* support/Util/NewAlloc.c (freeTrace): Changed free for the gc case to not free...

[fw/sdcc] / src / altlex.c
diff --git a/src/altlex.c b/src/altlex.c

index da1862bdf97880e4579965a903d056f84a7c203e..5b84d12d17ca6df790442a26cfd0a376f572c25b 100644 (file)
--- a/src/altlex.c
+++ b/src/altlex.c
@@ -1,485 +1,1217 @@
  /** @file altlex.c
      An alternate lexer to SDCC.lex.
      In development - ie messy and just plain wrong.
+    Inspired by the gcc lexer, c-lex.c.
  */
  #include "common.h"
+#include "reswords.h"
  #include <assert.h>
  
+#define DUMP_OUTPUT            0
+
+/* Right.  What are the parts of the C stream?  From SDCC.lex:
+   D = [0..9]           digits
+   L = [a..z A..Z _]    alphanumerics and _
+   H = [a..f A..F 0-9]  Hex digits
+   E = [eE+-0-9]        Digits in a float
+   FS = [fFlL]          Specifiers for a float
+   IS = [uUlL]          Specifiers for a int
+
+   L[LD]*               A 'token' - cant think of a good name
+   Check tokens against the reserved words.
+   If match 
+   return the token id.
+   else 
+   If in the typedef table, do stuff...
+   Blah.  See check_type()
+   0[xX]{H}+            Hex number - PENDING: specifiers
+   0{D}+                Octal number - PENDING: specifiers
+   {D}+                 Decimal - PENDING: specifiers
+   Floats               PENDING
+
+   Exceptions:
+   Comment start        Strip until end of comment.
+   ...                  Ellipses
+
+   So the inputs are:
+   Skip whitespace
+   switch class
+   L    Try to read a token
+   D    Try to read a number
+   Punct        Try to read punct
+ */
+
+extern int fatalError;
+extern int lineno;
+extern char *filename;
+
  FILE *yyin;
  
  int yylineno;
-int column;
  char *currFname;
  char *yytext;
  
-/* Right.  What are the parts of the C stream?  From SDCC.lex:
-   D = [0..9]          digits
-   L = [a..z A..Z _]   alphanumerics and _
-   H = [a..f A..F 0-9] Hex digits
-   E = [eE+-0-9]       Digits in a float
-   FS = [fFlL]         Specifiers for a float
-   IS = [uUlL]         Specifiers for a int
-
-   L[LD]*                      A 'token' - cant think of a good name
-                       Check tokens against the reserved words.
-                       If match 
-                               return the token id.
-                       else 
-                               If in the typedef table, do stuff...
-                               Blah.  See check_type()
-   0[xX]{H}+           Hex number - PENDING: specifiers
-   0{D}+               Octal number - PENDING: specifiers
-   {D}+                        Decimal - PENDING: specifiers
-   Floats              PENDING
-   
-   Exceptions:
-   Comment start       Strip until end of comment.
-   ...                 Ellipses
+static char linebuf[10000];
+static int linepos, linelen;
+static int end_of_file;
  
-   So the inputs are:
-      Skip whitespace
-      switch class
-        L      Try to read a token
-        D      Try to read a number
-        Punct  Try to read punct
-*/     
+#ifdef __GNUC__
+#define INLINE inline
+#else
+#define INLINE
+#endif
  
-char linebuf[10000];
-int linepos, linelen;
+#define ERRSINK stderr
  
-#define INLINE inline
+static void 
+error (const char *sz,...)
+{
+  va_list ap;
+  fatalError++;
+
+  if (filename && lineno)
+    {
+      fprintf (ERRSINK, "%s(%d):", filename, lineno);
+    }
+  fprintf (ERRSINK, "error *** ");
+  va_start (ap, sz);
+  vfprintf (ERRSINK, sz, ap);
+  va_end (ap);
+  fprintf (ERRSINK, "\n");
+  fflush (ERRSINK);
+}
  
-static int underflow(void)
+static int 
+underflow (void)
  {
-    linelen = fread(linebuf, 1, sizeof(linebuf), yyin);
-    if (linelen <= 0)
-       return EOF;
-    linepos = 0;
-    return linebuf[linepos++];
+  linelen = fread (linebuf, 1, sizeof (linebuf), yyin);
+  if (linelen <= 0)
+    return EOF;
+  linepos = 0;
+  return linebuf[linepos++];
  }
  
-static int INLINE ygetc(void)
+static int INLINE 
+ygetc (void)
  {
-    if (linepos < linelen)
-       return linebuf[linepos++];
-    else
-       return underflow();
+  if (linepos < linelen)
+    return linebuf[linepos++];
+  else
+    return underflow ();
  };
  
-static int INLINE yungetc(int c)
+static int INLINE 
+yungetc (int c)
  {
-    linebuf[--linepos] = c;
-    return 0;
+  linebuf[--linepos] = c;
+  return 0;
  }
  
  #define GETC()         ygetc()
  #define UNGETC(_a)     yungetc(_a)
  
-//#define GETC()               fgetc(yyin);
-//#define UNGETC(_a)   ungetc(_a, yyin)
+//#define GETC()                fgetc(yyin);
+//#define UNGETC(_a)    ungetc(_a, yyin)
  #define ISL(_a)                (isalnum(_a) || _a == '_')
  #define ISALNUM(_a)    isalnum(_a)
-#define ISHEX(_a)      isalnum(_a)
+#define ISHEX(_a)      isxdigit(_a)
  
-char *stringLiteral (void)
+static char *
+stringLiteral (void)
  {
-    static char line[1000];
-    int ch;
-    char *str = line;
-       
-    *str++ = '\"'                      ;
-    /* put into the buffer till we hit the */
-    /* first \" */
-    while (1) {
-
-       ch = GETC();
-       if (!ch)            break       ; /* end of input */
-       /* if it is a \ then everything allowed */
-       if (ch == '\\') {
-           *str++ = ch     ; /* backslash in place */
-           *str++ = GETC()             ; /* following char in place */
-           continue                    ;      /* carry on */
+  static char line[1000];
+  int ch;
+  char *str = line;
+
+  *str++ = '\"';
+  /* put into the buffer till we hit the */
+  /* first \" */
+  while (1)
+    {
+
+      ch = GETC ();
+      if (!ch)
+       break;                  /* end of input */
+      /* if it is a \ then everything allowed */
+      if (ch == '\\')
+       {
+         *str++ = ch;          /* backslash in place */
+         *str++ = GETC ();     /* following char in place */
+         continue;             /* carry on */
         }
-            
-       /* if new line we have a new line break */
-       if (ch == '\n') break           ;
-        
-       /* if this is a quote then we have work to do */
-       /* find the next non whitespace character     */
-       /* if that is a double quote then carry on    */
-       if (ch == '\"') {
-        
-           while ((ch = GETC()) && isspace(ch)) ;
-           if (!ch) break              ; 
-           if (ch != '\"') {
-               UNGETC(ch)                      ;
-               break                   ;
+
+      /* if new line we have a new line break */
+      if (ch == '\n')
+       break;
+
+      /* if this is a quote then we have work to do */
+      /* find the next non whitespace character     */
+      /* if that is a double quote then carry on    */
+      if (ch == '\"')
+       {
+
+         while ((ch = GETC ()) && isspace (ch));
+         if (!ch)
+           break;
+         if (ch != '\"')
+           {
+             UNGETC (ch);
+             break;
             }
-                 
-           continue            ;
-        }
-       *str++  = ch;     
-    }  
-    *str++ = '\"'                      ;
-    *str = '\0';
-    return line;
+
+         continue;
+       }
+      *str++ = ch;
+    }
+  *str++ = '\"';
+  *str = '\0';
+  return line;
  }
  
-void discard_comments(int type)
+static void 
+discard_comments (int type)
  {
-    int c;
-    if (type == '*') {
-       do {
-           c = GETC();
-           if (c == '*') {
-               c = GETC();
-               if (c == '/')
-                   return;
-           }
-           else if (c == EOF)
+  int c;
+  if (type == '*')
+    {
+      do
+       {
+         c = GETC ();
+         if (c == '*')
+           {
+             c = GETC ();
+             if (c == '/')
                 return;
-       } while (1);
+           }
+         else if (c == EOF)
+           return;
+       }
+      while (1);
      }
-    else if (type == '/') {
-       while (c != '\n' && c != EOF) {
-           c = GETC();
+  else if (type == '/')
+    {
+      do
+       {
+         c = GETC ();
         }
+      while (c != '\n' && c != EOF);
      }
-    else {
-       assert(0);
+  else
+    {
+      assert (0);
      }
  }
  
-#define TKEYWORD(_a)   return _a
-
-int check_token(const char *sz)
+/* will return 1 if the string is a part
+   of a target specific keyword */
+static INLINE int 
+isTargetKeyword (const char *s)
  {
-    if (!strcmp(sz, "at")) {
-       TKEYWORD(AT)  ; }
-    
-    else if (!strcmp(sz, "auto")) {
-        return(AUTO); }
-    
-    else if (!strcmp(sz, "bit")) {
-        TKEYWORD(BIT) ; }
-    
-    else if (!strcmp(sz, "break")) {
-        return(BREAK); }
-
-    else if (!strcmp(sz, "case")) {
-        return(CASE); }
-
-    else if (!strcmp(sz, "char")) {
-        return(CHAR); }
-
-    else if (!strcmp(sz, "code")) {
-        TKEYWORD(CODE); }
-
-    else if (!strcmp(sz, "const")) {
-        return(CONST); }
-
-    else if (!strcmp(sz, "continue")) {
-        return(CONTINUE); }
-
-    else if (!strcmp(sz, "critical")) {
-        TKEYWORD(CRITICAL); } 
-
-    else if (!strcmp(sz, "data")) {
-        TKEYWORD(DATA);   }
-
-    else if (!strcmp(sz, "default")) {
-        return(DEFAULT); }
-
-    else if (!strcmp(sz, "do")) {
-        return(DO); }
-
-    else if (!strcmp(sz, "double")) {
-        werror(W_DOUBLE_UNSUPPORTED);return(FLOAT); }
+  int i;
  
-    else if (!strcmp(sz, "else")) {
-        return(ELSE); }
-
-    else if (!strcmp(sz, "enum")) {
-        return(ENUM); }
-
-    else if (!strcmp(sz, "extern")) {
-        return(EXTERN); }
-
-    else if (!strcmp(sz, "far")) {
-        TKEYWORD(XDATA);  }
-
-    else if (!strcmp(sz, "eeprom")) {
-        TKEYWORD(EEPROM);  }
-
-    else if (!strcmp(sz, "float")) {
-        return(FLOAT); }
-
-    else if (!strcmp(sz, "flash")) {
-        TKEYWORD(CODE);}
-
-    else if (!strcmp(sz, "for")) {
-        return(FOR); }
-
-    else if (!strcmp(sz, "goto")) {
-        return(GOTO); }
-
-    else if (!strcmp(sz, "idata")) {
-        TKEYWORD(IDATA);}
-
-    else if (!strcmp(sz, "if")) {
-        return(IF); }
-
-    else if (!strcmp(sz, "int")) {
-        return(INT); }
-
-    else if (!strcmp(sz, "interrupt")) {
-        return(INTERRUPT);}
-
-    else if (!strcmp(sz, "nonbanked")) {
-        TKEYWORD(NONBANKED);}
-
-    else if (!strcmp(sz, "banked")) {
-        TKEYWORD(BANKED);}
-
-    else if (!strcmp(sz, "long")) {
-        return(LONG); }
-
-    else if (!strcmp(sz, "near")) {
-        TKEYWORD(DATA);}
-
-    else if (!strcmp(sz, "pdata")) {
-        TKEYWORD(PDATA); }
-
-    else if (!strcmp(sz, "reentrant")) {
-        TKEYWORD(REENTRANT);}
-
-    else if (!strcmp(sz, "register")) {
-        return(REGISTER); }
-
-    else if (!strcmp(sz, "return")) {
-        return(RETURN); }
-
-    else if (!strcmp(sz, "sfr")) {
-        TKEYWORD(SFR)  ; }
-
-    else if (!strcmp(sz, "sbit")) {
-        TKEYWORD(SBIT) ; }
-
-    else if (!strcmp(sz, "short")) {
-        return(SHORT); }
-
-    else if (!strcmp(sz, "signed")) {
-        return(SIGNED); }
-
-    else if (!strcmp(sz, "sizeof")) {
-        return(SIZEOF); }
-
-    else if (!strcmp(sz, "sram")) {
-        TKEYWORD(XDATA);}
-
-    else if (!strcmp(sz, "static")) {
-        return(STATIC); }
-
-    else if (!strcmp(sz, "struct")) {
-        return(STRUCT); }
+  if (port->keywords == NULL)
+    return 0;
+  for (i = 0; port->keywords[i]; i++)
+    {
+      if (strcmp (port->keywords[i], s) == 0)
+       return 1;
+    }
  
-    else if (!strcmp(sz, "switch")) {
-        return(SWITCH); }
+  return 0;
+}
  
-    else if (!strcmp(sz, "typedef")) {
-        return(TYPEDEF); }
+static INLINE int 
+check_token (const char *sz)
+{
+  const struct reserved_words *p;
+  p = is_reserved_word (sz, strlen (sz));
+  if (p)
+    {
+      if (!p->is_special || isTargetKeyword (sz))
+       return p->token;
+    }
  
-    else if (!strcmp(sz, "union")) {
-        return(UNION); }
+  /* check if it is in the typedef table */
+  if (findSym (TypedefTab, NULL, sz))
+    {
+      strcpy (yylval.yychar, sz);
+      return TYPE_NAME;
+    }
+  else
+    {
+      strcpy (yylval.yychar, sz);
+      return IDENTIFIER;
+    }
+}
  
-    else if (!strcmp(sz, "unsigned")) {
-        return(UNSIGNED); }
+static void 
+handle_pragma (void)
+{
+  char line[128], *p;
+  int c;
+
+  c = GETC ();
+  while (c == '\t' || c == ' ')
+    c = GETC ();
+  p = line;
+  while (!isspace (c))
+    {
+      *p++ = c;
+      c = GETC ();
+    }
+  *p = '\0';
+  if (line[0] == '\0')
+    error ("Missing argument to pragma");
+  else
+    {
+      /* First give the port a chance */
+      if (port->process_pragma && !port->process_pragma (line))
+       return;
+      /* PENDING: all the SDCC shared pragmas */
+      /* Nothing handled it */
+      error ("Unrecognised #pragma %s", line);
+    }
+}
  
-    else if (!strcmp(sz, "void")) {
-        return(VOID); }
+static void 
+handle_line (void)
+{
+  int c;
+  char line[128], *p;
+
+  c = GETC ();
+  while (c == '\t' || c == ' ')
+    c = GETC ();
+  p = line;
+  while (isdigit (c))
+    {
+      *p++ = c;
+      c = GETC ();
+    }
+  *p = '\0';
+  if (line[0] == '\0')
+    error ("Error in number in #line");
+  /* This is weird but cpp seems to add an extra three to the line no */
+  yylineno = atoi (line) - 3;
+  lineno = yylineno;
+  /* Fetch the filename if there is one */
+  while (c == '\t' || c == ' ')
+    c = GETC ();
+  if (c == '\"')
+    {
+      p = line;
+      c = GETC ();
+      while (c != '\"' && c != EOF && c != '\n')
+       {
+         *p++ = c;
+         c = GETC ();
+       }
+      if (c == '\"')
+       {
+         *p = '\0';
+         currFname = Safe_strdup (line);
+       }
+      filename = currFname;
+    }
+}
  
-    else if (!strcmp(sz, "volatile")) {
-        return(VOLATILE); }
+static INLINE void 
+invalid_directive (void)
+{
+  error ("Invalid directive");
+}
  
-    else if (!strcmp(sz, "using")) {
-        TKEYWORD(USING); }
+static INLINE int 
+check_newline (void)
+{
+  int c;
+  yylineno++;
+  lineno = yylineno;
+
+  /* Skip any leading white space */
+  c = GETC ();
+  while (c == '\t' || c == ' ')
+    c = GETC ();
+  /* Were only interested in #something */
+  if (c != '#')
+    return c;
+  c = GETC ();
+  while (c == '\t' || c == ' ')
+    c = GETC ();
+  /* The text in the stream is the type of directive */
+  switch (c)
+    {
+    case 'l':
+      /* Start of line? */
+      if (GETC () == 'i' && GETC () == 'n' && GETC () == 'e')
+       {
+         c = GETC ();
+         if (c == '\t' || c == ' ')
+           handle_line ();
+         else
+           invalid_directive ();
+       }
+      else
+       invalid_directive ();
+      break;
+    case 'p':
+      /* Start of pragma? */
+      if (GETC () == 'r' && GETC () == 'a' && GETC () == 'g' &&
+         GETC () == 'm' && GETC () == 'a')
+       {
+         c = GETC ();
+         if (c == '\t' || c == ' ')
+           handle_pragma ();
+         else
+           invalid_directive ();
+       }
+      else
+       invalid_directive ();
+      break;
+    default:
+      invalid_directive ();
+    }
+  /* Discard from here until the start of the next line */
+  while (c != '\n' && c != EOF)
+    c = GETC ();
+  return c;
+}
  
-    else if (!strcmp(sz, "while")) {
-        return(WHILE); }
+static int 
+skip_whitespace (int c)
+{
+  while (1)
+    {
+      switch (c)
+       {
+       case ' ':
+       case '\t':
+       case '\f':
+       case '\v':
+       case '\b':
+       case '\r':
+         c = GETC ();
+         break;
+       case '\n':
+         c = check_newline ();
+       default:
+         return c;
+       }
+    }
+}
  
-    else if (!strcmp(sz, "xdata")) {
-        TKEYWORD(XDATA); }
+void 
+yyerror (const char *s)
+{
+  if (end_of_file)
+    error ("%s at end of of input", s);
+  else if (yytext[0] == '\0')
+    error ("%s at null character", s);
+  else if (yytext[0] == '"')
+    error ("%s before string constant", s);
+  else if (yytext[0] == '\'')
+    error ("%s before character constant", s);
+  else
+    error ("%s before %s", s, yytext);
+}
  
-    else if (!strcmp(sz, "_data")) {
-        TKEYWORD(_NEAR); }
+static int 
+_yylex (void)
+{
+  int c;
+  static char line[128];
+  char *p;
  
-    else if (!strcmp(sz, "_code")) {
-        TKEYWORD(_CODE); }
+  yytext = line;
  
-    else if (!strcmp(sz, "_eeprom")) {
-        TKEYWORD(_EEPROM); }
+  c = GETC ();
+  while (1)
+    {
+      switch (c)
+       {
+       case ' ':
+       case '\t':
+       case '\f':
+       case '\v':
+       case '\b':
+         /* Skip whitespace */
+         c = GETC ();
+         break;
+       case '\r':
+       case '\n':
+         c = skip_whitespace (c);
+         break;
+       case '#':
+         UNGETC (c);
+         c = check_newline ();
+         break;
+       default:
+         goto past_ws;
+       }
+    }
  
-    else if (!strcmp(sz, "_flash")) {
-        TKEYWORD(_CODE); }
+past_ws:
+  /* Handle comments first */
+  if (c == '/')
+    {
+      int c2 = GETC ();
+      if (c2 == '*' || c2 == '/')
+       {
+         discard_comments (c2);
+         c = GETC ();
+       }
+      else
+       UNGETC (c2);
+    }
+  switch (c)
+    {
+    case EOF:
+      end_of_file = TRUE;
+      line[0] = '\0';
+      return 0;
+    case 'a':
+    case 'b':
+    case 'c':
+    case 'd':
+    case 'e':
+    case 'f':
+    case 'g':
+    case 'h':
+    case 'i':
+    case 'j':
+    case 'k':
+    case 'l':
+    case 'm':
+    case 'n':
+    case 'o':
+    case 'p':
+    case 'q':
+    case 'r':
+    case 's':
+    case 't':
+    case 'u':
+    case 'v':
+    case 'w':
+    case 'x':
+    case 'y':
+    case 'z':
+    case 'A':
+    case 'B':
+    case 'C':
+    case 'D':
+    case 'E':
+    case 'F':
+    case 'G':
+    case 'H':
+    case 'I':
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'O':
+    case 'P':
+    case 'Q':
+    case 'R':
+    case 'S':
+    case 'T':
+    case 'U':
+    case 'V':
+    case 'W':
+    case 'X':
+    case 'Y':
+    case 'Z':
+    case '_':
+      /* Start of a token.  Parse. */
+      p = line;
+      *p++ = c;
+      c = GETC ();
+      while (ISL (c))
+       {
+         *p++ = c;
+         c = GETC ();
+       }
+      *p = '\0';
+      UNGETC (c);
+      return check_token (line);
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      p = line;
+      *p++ = c;
+      c = GETC ();
+      if (c == 'x' || c == 'X')
+       {
+         *p++ = c;
+         c = GETC ();
+       }
+      while (ISHEX (c))
+       {
+         *p++ = c;
+         c = GETC ();
+       }
+      if (c == 'U' || c == 'u' || c == 'L' || c == 'l')
+       {
+         *p++ = c;
+         c = GETC ();
+       }
+      if (c == 'U' || c == 'u' || c == 'L' || c == 'l')
+       {
+         *p++ = c;
+         c = GETC ();
+       }
+      *p = '\0';
+      UNGETC (c);
+      yylval.val = constVal (line);
+      return CONSTANT;
+    case '\"':
+      /* A string */
+      p = stringLiteral ();
+      yylval.val = strVal (p);
+      return (STRING_LITERAL);
+    case '\'':
+      /* Possible formats:
+         ['\n', '\\', '\'', '\"'...]
+         ['a'...]
+       */
+      p = line;
+      *p++ = c;
+      c = GETC ();
+      if (c == '\\')
+       {
+         *p++ = c;
+         c = GETC ();
+         /* Fall through */
+       }
+      *p++ = c;
+      c = GETC ();
+      *p++ = c;
+      *p = '\0';
+      if (c != '\'')
+       {
+         error ("Unrecognised character constant %s", line);
+       }
+      yylval.val = charVal (line);
+      return CONSTANT;
+    case '=':
+    case '&':
+    case '!':
+    case '-':
+    case '+':
+    case '*':
+    case '/':
+    case '%':
+    case '<':
+    case '>':
+    case '^':
+    case '|':
+      {
+       /* Cases which can be compounds */
+       /* The types and classes of composites are:
+          >>= <<=
+          += -= *= /= %= &= ^= |=
+          >> << ++ --
+          && ||
+          <= >= == !=
+          ->
+          So a composite started by char 'x' can be:
+          1. Followed by itself then an equals
+          2. Followed by itself
+          3. Followed by an equals
+          4. Be a '->'
+          5. Be by itself
+        */
+       int next = GETC ();
+       /* Class 1 and 2 */
+       if (next == c)
+         {
+           next = GETC ();
+           /* Class 1 */
+           if (next == '=')
+             {
+               switch (c)
+                 {
+                 case '>':     // >>=
+
+                   yylval.yyint = RIGHT_ASSIGN;
+                   return RIGHT_ASSIGN;
+                 case '<':     // <<=
+
+                   yylval.yyint = LEFT_ASSIGN;
+                   return LEFT_ASSIGN;
+                 default:
+                   error ("Unrecognised token %c%c=", c, c);
+                 }
+             }
+           else
+             {
+               /* Push the next char back on and find the class */
+               UNGETC (next);
+               /* Case 2 */
+               switch (c)
+                 {
+                 case '>':     // >>
+
+                   return RIGHT_OP;
+                 case '<':     // <<
+
+                   return LEFT_OP;
+                 case '+':
+                   return INC_OP;
+                 case '-':
+                   return DEC_OP;
+                 case '&':
+                   return AND_OP;
+                 case '|':
+                   return OR_OP;
+                 case '=':
+                   return EQ_OP;
+                 default:
+                   error ("Unrecognised token %c%c", c, c);
+                 }
+             }
+         }
+       /* Case 3 */
+       else if (next == '=')
+         {
+           int result = 0;
+           switch (c)
+             {
+             case '+':
+               result = ADD_ASSIGN;
+               break;
+             case '-':
+               result = SUB_ASSIGN;
+               break;
+             case '*':
+               result = MUL_ASSIGN;
+               break;
+             case '/':
+               result = DIV_ASSIGN;
+               break;
+             case '%':
+               result = MOD_ASSIGN;
+               break;
+             case '&':
+               result = AND_ASSIGN;
+               break;
+             case '^':
+               result = XOR_ASSIGN;
+               break;
+             case '|':
+               result = OR_ASSIGN;
+               break;
+             case '<':
+               result = LE_OP;
+               break;
+             case '>':
+               result = GE_OP;
+               break;
+             case '!':
+               result = NE_OP;
+               break;
+             default:
+               error ("Unrecognised token %c=", c);
+             }
+           if (result)
+             {
+               yylval.yyint = result;
+               return result;
+             }
+         }
+       /* Case 4 */
+       else if (c == '-' && next == '>')
+         {
+           return PTR_OP;
+         }
+       /* Case 5 */
+       else
+         {
+           UNGETC (next);
+           return c;
+         }
+       break;
+      }
+    case '{':
+      NestLevel++;
+      return c;
+    case '}':
+      NestLevel--;
+      return c;
+    case '.':
+      c = GETC ();
+      if (c == '.')
+       {
+         c = GETC ();
+         if (c == '.')
+           {
+             return VAR_ARGS;
+           }
+       }
+      UNGETC (c);
+      return '.';
+    case '[':
+    case ']':
+      return c;
+    case ',':
+    case ':':
+    case '(':
+    case ')':
+    case '~':
+    case '?':
+    case ';':
+      /* Special characters that cant be part of a composite */
+      return c;
+    default:
+      error ("Unhandled character %c", c);
+    }
+  return 0;
+}
  
-    else if (!strcmp(sz, "_generic")) {
-        TKEYWORD(_GENERIC); }
+#define ENTRY(_a)      case (_a): printf(#_a); break;
  
-    else if (!strcmp(sz, "_near")) {
-        TKEYWORD(_NEAR); }
+int 
+yylex (void)
+{
+  int ret = _yylex ();
+#if DUMP_OUTPUT
+  static int lastpos = 0;
+  char tmp;
+
+  printf ("Returning ");
+  switch (ret)
+    {
+      /* Wrapper */
+      ENTRY (IDENTIFIER);
+      ENTRY (TYPE_NAME);
+      ENTRY (CONSTANT);
+      ENTRY (STRING_LITERAL);
+      ENTRY (SIZEOF);
+      ENTRY (PTR_OP);
+      ENTRY (INC_OP);
+      ENTRY (DEC_OP);
+      ENTRY (LEFT_OP);
+      ENTRY (RIGHT_OP);
+      ENTRY (LE_OP);
+      ENTRY (GE_OP);
+      ENTRY (EQ_OP);
+      ENTRY (NE_OP);
+      ENTRY (AND_OP);
+      ENTRY (OR_OP);
+      ENTRY (MUL_ASSIGN);
+      ENTRY (DIV_ASSIGN);
+      ENTRY (MOD_ASSIGN);
+      ENTRY (ADD_ASSIGN);
+      ENTRY (SUB_ASSIGN);
+      ENTRY (LEFT_ASSIGN);
+      ENTRY (RIGHT_ASSIGN);
+      ENTRY (AND_ASSIGN);
+      ENTRY (XOR_ASSIGN);
+      ENTRY (OR_ASSIGN);
+      ENTRY (TYPEDEF);
+      ENTRY (EXTERN);
+      ENTRY (STATIC);
+      ENTRY (AUTO);
+      ENTRY (REGISTER);
+      ENTRY (CODE);
+      ENTRY (EEPROM);
+      ENTRY (INTERRUPT);
+      ENTRY (SFR);
+      ENTRY (AT);
+      ENTRY (SBIT);
+      ENTRY (REENTRANT);
+      ENTRY (USING);
+      ENTRY (XDATA);
+      ENTRY (DATA);
+      ENTRY (IDATA);
+      ENTRY (PDATA);
+      ENTRY (VAR_ARGS);
+      ENTRY (CRITICAL);
+      ENTRY (NONBANKED);
+      ENTRY (BANKED);
+      ENTRY (CHAR);
+      ENTRY (SHORT);
+      ENTRY (INT);
+      ENTRY (LONG);
+      ENTRY (SIGNED);
+      ENTRY (UNSIGNED);
+      ENTRY (FLOAT);
+      ENTRY (DOUBLE);
+      ENTRY (CONST);
+      ENTRY (VOLATILE);
+      ENTRY (VOID);
+      ENTRY (BIT);
+      ENTRY (STRUCT);
+      ENTRY (UNION);
+      ENTRY (ENUM);
+      ENTRY (ELIPSIS);
+      ENTRY (RANGE);
+      ENTRY (FAR);
+      ENTRY (_XDATA);
+      ENTRY (_CODE);
+      ENTRY (_GENERIC);
+      ENTRY (_NEAR);
+      ENTRY (_PDATA);
+      ENTRY (_IDATA);
+      ENTRY (_EEPROM);
+      ENTRY (CASE);
+      ENTRY (DEFAULT);
+      ENTRY (IF);
+      ENTRY (ELSE);
+      ENTRY (SWITCH);
+      ENTRY (WHILE);
+      ENTRY (DO);
+      ENTRY (FOR);
+      ENTRY (GOTO);
+      ENTRY (CONTINUE);
+      ENTRY (BREAK);
+      ENTRY (RETURN);
+      ENTRY (INLINEASM);
+      ENTRY (IFX);
+      ENTRY (ADDRESS_OF);
+      ENTRY (GET_VALUE_AT_ADDRESS);
+      ENTRY (SPIL);
+      ENTRY (UNSPIL);
+      ENTRY (GETHBIT);
+      ENTRY (BITWISEAND);
+      ENTRY (UNARYMINUS);
+      ENTRY (IPUSH);
+      ENTRY (IPOP);
+      ENTRY (PCALL);
+      ENTRY (ENDFUNCTION);
+      ENTRY (JUMPTABLE);
+      ENTRY (RRC);
+      ENTRY (RLC);
+      ENTRY (CAST);
+      ENTRY (CALL);
+      ENTRY (PARAM);
+      ENTRY (NULLOP);
+      ENTRY (BLOCK);
+      ENTRY (LABEL);
+      ENTRY (RECEIVE);
+      ENTRY (SEND);
+    default:
+      printf ("default: %c", ret);
+    }
+  tmp = linebuf[linepos];
+  linebuf[linepos] = '\0';
+  printf (" for %s (%u bytes)\n", linebuf + lastpos, linepos - lastpos);
+  linebuf[linepos] = tmp;
+  lastpos = linepos;
+  fflush (stdout);
+#endif
+  return ret;
+}
  
-    else if (!strcmp(sz, "_sram")) {
-        TKEYWORD(_XDATA);}
+#define TEST(_a)       (_a) ? (void)0 : printf("Test %s failed\n", #_a);
  
-    else if (!strcmp(sz, "_xdata")) {
-        TKEYWORD(_XDATA);}
+int 
+altlex_testparse (const char *input)
+{
+  /* Fiddle with the read-ahead buffer to insert ourselves */
+  strcpy (linebuf, input);
+  linelen = strlen (linebuf) + 1;
+  linepos = 0;
  
-    else if (!strcmp(sz, "_pdata")) {
-        TKEYWORD(_PDATA); }
+  return yylex ();
+}
  
-    else if (!strcmp(sz, "_idata")) {
-        TKEYWORD(_IDATA); }
+int 
+altlex_testchar (const char *input)
+{
+  value *val;
+  if (altlex_testparse (input) != CONSTANT)
+    return -2;
+  val = yylval.val;
+  if (val->type->class != SPECIFIER)
+    return -3;
+  if (SPEC_NOUN (val->type) != V_CHAR)
+    return -4;
+  if (SPEC_SCLS (val->type) != S_LITERAL)
+    return -5;
+  return SPEC_CVAL (val->type).v_int;
+}
  
-    /* check if it is in the typedef table */
-    if (findSym(TypedefTab,NULL,sz)) {
-       strcpy(yylval.yychar,sz);
-       return (TYPE_NAME) ;
-    }
-    else   {
-       strcpy (yylval.yychar,sz);
-       return(IDENTIFIER);
-    }
+int 
+altlex_testnum (const char *input)
+{
+  value *val;
+  if (altlex_testparse (input) != CONSTANT)
+    return -2;
+  val = yylval.val;
+  if (val->type->class != SPECIFIER)
+    return -3;
+  if (SPEC_NOUN (val->type) != V_INT)
+    return -4;
+  if (SPEC_SCLS (val->type) != S_LITERAL)
+    return -5;
+  if (SPEC_USIGN (val->type))
+    return SPEC_CVAL (val->type).v_uint;
+  else
+    return SPEC_CVAL (val->type).v_int;
  }
  
-int yylex(void)
+int 
+altlex_runtests (void)
  {
-    int c;
-    char line[128];
-    char *p;
-
-    c = GETC();
-    while (1) {
-       /* Handle comments first */
-       if (c == '/') {
-           int c2 = GETC();
-           if (c2 == '*' || c2 == '/') {
-               discard_comments(c2);
-               c = GETC();
-               continue;
-           }
-           else
-               UNGETC(c2);
-       }
-       switch (c) {
-       case EOF:
-           return 0;
-       case ' ':
-       case '\t':
-       case '\r':
-       case '\n':
-           /* Skip whitespace */
-           break;
-       case 'a': case 'b': case 'c': case 'd':
-       case 'e': case 'f': case 'g': case 'h':
-       case 'i': case 'j': case 'k': case 'l':
-       case 'm': case 'n': case 'o': case 'p':
-       case 'q': case 'r': case 's': case 't':
-       case 'u': case 'v': case 'w': case 'x':
-       case 'y': case 'z':
-       case 'A': case 'B': case 'C': case 'D':
-       case 'E': case 'F': case 'G': case 'H':
-       case 'I': case 'J': case 'K': case 'L':
-       case 'M': case 'N': case 'O': case 'P':
-       case 'Q': case 'R': case 'S': case 'T':
-       case 'U': case 'V': case 'W': case 'X':
-       case 'Y': case 'Z':
-       case '_':
-           /* Start of a token.  Parse. */
-           p = line;
-           *p++ = c;
-           c = GETC();
-           while (ISL(c)) {
-               *p++ = c;
-               c = GETC();
-           }
-           *p = '\0';
-           UNGETC(c);
-           return check_token(line);
-       case '0': case '1':
-       case '2': case '3': case '4': case '5':
-       case '6': case '7': case '8': case '9':
-           p = line;
-           *p++ = c;
-           c = GETC();
-           if (c == 'x' || c == 'X') {
-               *p++ = c;
-               c = GETC();
-           }
-           while (ISHEX(c)) {
-               *p++ = c;
-               c = GETC();
-           }
-           *p = '\0';
-           UNGETC(c);
-           yylval.val = constVal(line);
-           return CONSTANT;
-       case '\"':
-           /* A string */
-           p = stringLiteral();
-           yylval.val = strVal(p);
-           return(STRING_LITERAL);
-       case '\'':
-           /* ie '\n' */
-           break;
-       case '#':
-           /* Assume a pragma and toast the rest of the line. */
-           c = GETC();
-           while (c != '\n') {
-               c = GETC();
-           }
-           break;
-       case '=':
-       case '&':
-       case '!':
-       case '-':
-       case '+':
-       case '*':
-       case '/':
-       case '%':
-       case '<':
-       case '>':
-       case '^':
-       case '|':
-           /* Cases which can be compounds */
-           return c;
-       case '{':
-           NestLevel++;
-           return c;
-       case '}':
-           NestLevel--;
-           return c;
-       case '.':
-           c = GETC();
-           if (c == '.') {
-               c = GETC();
-               if (c == '.') {
-                   return VAR_ARGS;
-               }
-           }
-           UNGETC(c);
-           /* Fall through */
-       case ',':
-       case ':':
-       case '(': case ')':
-       case '[': case ']':
-       case '~':
-       case '?':
-           /* Special characters that cant be part of a composite */
-           return c;
-       default:
-           printf("Unhandled char %c\n", c);
-       }
-       c = GETC();
-    }
-    return 0;
+  /* These conditions are ripped directly from SDCC.lex */
+  /* First check the parsing of the basic tokens */
+  TEST (altlex_testparse (">>=") == RIGHT_ASSIGN);
+  TEST (altlex_testparse ("<<=") == LEFT_ASSIGN);
+  TEST (altlex_testparse ("+=") == ADD_ASSIGN);
+  TEST (altlex_testparse ("-=") == SUB_ASSIGN);
+  TEST (altlex_testparse ("*=") == MUL_ASSIGN);
+  TEST (altlex_testparse ("/=") == DIV_ASSIGN);
+  TEST (altlex_testparse ("%=") == MOD_ASSIGN);
+  TEST (altlex_testparse ("&=") == AND_ASSIGN);
+  TEST (altlex_testparse ("^=") == XOR_ASSIGN);
+  TEST (altlex_testparse ("|=") == OR_ASSIGN);
+  TEST (altlex_testparse (">>") == RIGHT_OP);
+  TEST (altlex_testparse ("<<") == LEFT_OP);
+  TEST (altlex_testparse ("++") == INC_OP);
+  TEST (altlex_testparse ("--") == DEC_OP);
+  TEST (altlex_testparse ("->") == PTR_OP);
+  TEST (altlex_testparse ("&&") == AND_OP);
+  TEST (altlex_testparse ("||") == OR_OP);
+  TEST (altlex_testparse ("<=") == LE_OP);
+  TEST (altlex_testparse (">=") == GE_OP);
+  TEST (altlex_testparse ("==") == EQ_OP);
+  TEST (altlex_testparse ("!=") == NE_OP);
+  TEST (altlex_testparse (";") == ';');
+  TEST (altlex_testparse ("{") == '{');
+  TEST (altlex_testparse ("}") == '}');
+  TEST (altlex_testparse (",") == ',');
+  TEST (altlex_testparse (":") == ':');
+  TEST (altlex_testparse ("=") == '=');
+  TEST (altlex_testparse ("(") == '(');
+  TEST (altlex_testparse (")") == ')');
+  TEST (altlex_testparse ("[") == '[');
+  TEST (altlex_testparse ("]") == ']');
+  TEST (altlex_testparse (".") == '.');
+  TEST (altlex_testparse ("&") == '&');
+  TEST (altlex_testparse ("!") == '!');
+  TEST (altlex_testparse ("~") == '~');
+  TEST (altlex_testparse ("-") == '-');
+  TEST (altlex_testparse ("+") == '+');
+  TEST (altlex_testparse ("*") == '*');
+  TEST (altlex_testparse ("/") == '/');
+  TEST (altlex_testparse ("%") == '%');
+  TEST (altlex_testparse ("<") == '<');
+  TEST (altlex_testparse (">") == '>');
+  TEST (altlex_testparse ("^") == '^');
+  TEST (altlex_testparse ("|") == '|');
+  TEST (altlex_testparse ("?") == '?');
+
+  /* Now some character constants */
+  TEST (altlex_testchar ("'1'") == '1');
+  TEST (altlex_testchar ("'a'") == 'a');
+  TEST (altlex_testchar ("'A'") == 'A');
+  TEST (altlex_testchar ("'z'") == 'z');
+  TEST (altlex_testchar ("'Z'") == 'Z');
+  TEST (altlex_testchar ("'\n'") == '\n');
+  TEST (altlex_testchar ("'\\\\'") == '\\');
+  TEST (altlex_testchar ("'\\''") == '\'');
+
+  /* And some numbers */
+  TEST (altlex_testnum ("0") == 0);
+  TEST (altlex_testnum ("1") == 1);
+  TEST (altlex_testnum ("075") == 075);
+  TEST (altlex_testnum ("0xfeed") == 0xfeed);
+  TEST (altlex_testnum ("0xFEED") == 0xFEED);
+  TEST (altlex_testnum ("0x00005678") == 0x5678);
+
+  /* Keywords */
+  TEST (altlex_testparse ("auto") == AUTO);
+  TEST (altlex_testparse ("break") == BREAK);
+  TEST (altlex_testparse ("case") == CASE);
+  TEST (altlex_testparse ("char") == CHAR);
+  TEST (altlex_testparse ("const") == CONST);
+  TEST (altlex_testparse ("continue") == CONTINUE);
+  TEST (altlex_testparse ("default") == DEFAULT);
+  TEST (altlex_testparse ("do") == DO);
+  /* Prints a warning */
+  //    TEST(altlex_testparse("double") == FLOAT);
+  TEST (altlex_testparse ("else") == ELSE);
+  TEST (altlex_testparse ("enum") == ENUM);
+  TEST (altlex_testparse ("extern") == EXTERN);
+  TEST (altlex_testparse ("float") == FLOAT);
+  TEST (altlex_testparse ("for") == FOR);
+  TEST (altlex_testparse ("goto") == GOTO);
+  TEST (altlex_testparse ("if") == IF);
+  TEST (altlex_testparse ("int") == INT);
+  TEST (altlex_testparse ("interrupt") == INTERRUPT);
+  TEST (altlex_testparse ("long") == LONG);
+  TEST (altlex_testparse ("register") == REGISTER);
+  TEST (altlex_testparse ("return") == RETURN);
+  TEST (altlex_testparse ("short") == SHORT);
+  TEST (altlex_testparse ("signed") == SIGNED);
+  TEST (altlex_testparse ("sizeof") == SIZEOF);
+  TEST (altlex_testparse ("static") == STATIC);
+  TEST (altlex_testparse ("struct") == STRUCT);
+  TEST (altlex_testparse ("switch") == SWITCH);
+  TEST (altlex_testparse ("typedef") == TYPEDEF);
+  TEST (altlex_testparse ("union") == UNION);
+  TEST (altlex_testparse ("unsigned") == UNSIGNED);
+  TEST (altlex_testparse ("void") == VOID);
+  TEST (altlex_testparse ("volatile") == VOLATILE);
+  TEST (altlex_testparse ("while") == WHILE);
+  TEST (altlex_testparse ("...") == VAR_ARGS);
+
+#if 0
+  /* Platform specific keywords */
+  TEST (altlex_testparse ("sram") ==)
+  {
+    count ();
+    TKEYWORD (XDATA);
+  }
+  TEST (altlex_testparse ("using") ==)
+  {
+    count ();
+    TKEYWORD (USING);
+  }
+  TEST (altlex_testparse ("near") ==)
+  {
+    count ();
+    TKEYWORD (DATA);
+  }
+  TEST (altlex_testparse ("at") ==)
+  {
+    count ();
+    TKEYWORD (AT);
+  }
+  TEST (altlex_testparse ("bit") ==)
+  {
+    count ();
+    TKEYWORD (BIT);
+  }
+  TEST (altlex_testparse ("code") ==)
+  {
+    count ();
+    TKEYWORD (CODE);
+  }
+  TEST (altlex_testparse ("critical") ==)
+  {
+    count ();
+    TKEYWORD (CRITICAL);
+  }
+  TEST (altlex_testparse ("data") ==)
+  {
+    count ();
+    TKEYWORD (DATA);
+  }
+  TEST (altlex_testparse ("far") ==)
+  {
+    count ();
+    TKEYWORD (XDATA);
+  }
+  TEST (altlex_testparse ("eeprom") ==)
+  {
+    count ();
+    TKEYWORD (EEPROM);
+  }
+  TEST (altlex_testparse ("flash") ==)
+  {
+    count ();
+    TKEYWORD (CODE);
+  }
+  TEST (altlex_testparse ("idata") ==)
+  {
+    count ();
+    TKEYWORD (IDATA);
+  }
+  TEST (altlex_testparse ("nonbanked") ==)
+  {
+    count ();
+    TKEYWORD (NONBANKED);
+  }
+  TEST (altlex_testparse ("banked") ==)
+  {
+    count ();
+    TKEYWORD (BANKED);
+  }
+  TEST (altlex_testparse ("pdata") ==)
+  {
+    count ();
+    TKEYWORD (PDATA);
+  }
+  TEST (altlex_testparse ("reentrant") ==)
+  {
+    count ();
+    TKEYWORD (REENTRANT);
+  }
+  TEST (altlex_testparse ("sfr") ==)
+  {
+    count ();
+    TKEYWORD (SFR);
+  }
+  TEST (altlex_testparse ("sbit") ==)
+  {
+    count ();
+    TKEYWORD (SBIT);
+  }
+  TEST (altlex_testparse ("xdata") ==)
+  {
+    count ();
+    TKEYWORD (XDATA);
+  }
+  TEST (altlex_testparse ("_data") ==)
+  {
+    count ();
+    TKEYWORD (_NEAR);
+  }
+  TEST (altlex_testparse ("_code") ==)
+  {
+    count ();
+    TKEYWORD (_CODE);
+  }
+  TEST (altlex_testparse ("_eeprom") ==)
+  {
+    count ();
+    TKEYWORD (_EEPROM);
+  }
+  TEST (altlex_testparse ("_flash") ==)
+  {
+    count ();
+    TKEYWORD (_CODE);
+  }
+  TEST (altlex_testparse ("_generic") ==)
+  {
+    count ();
+    TKEYWORD (_GENERIC);
+  }
+  TEST (altlex_testparse ("_near") ==)
+  {
+    count ();
+    TKEYWORD (_NEAR);
+  }
+  TEST (altlex_testparse ("_sram") ==)
+  {
+    count ();
+    TKEYWORD (_XDATA);
+  }
+  TEST (altlex_testparse ("_xdata") ==)
+  {
+    count ();
+    TKEYWORD (_XDATA);
+  }
+  TEST (altlex_testparse ("_pdata") ==)
+  {
+    count ();
+    TKEYWORD (_PDATA);
+  }
+  TEST (altlex_testparse ("_idata") ==)
+  {
+    count ();
+    TKEYWORD (_IDATA);
+  }
+#endif
+
+  return 0;
  }