2 An alternate lexer to SDCC.lex.
3 In development - ie messy and just plain wrong.
4 Inspired by the gcc lexer, c-lex.c.
12 /* Right. What are the parts of the C stream? From SDCC.lex:
14 L = [a..z A..Z _] alphanumerics and _
15 H = [a..f A..F 0-9] Hex digits
16 E = [eE+-0-9] Digits in a float
17 FS = [fFlL] Specifiers for a float
18 IS = [uUlL] Specifiers for a int
20 L[LD]* A 'token' - cant think of a good name
21 Check tokens against the reserved words.
25 If in the typedef table, do stuff...
26 Blah. See check_type()
27 0[xX]{H}+ Hex number - PENDING: specifiers
28 0{D}+ Octal number - PENDING: specifiers
29 {D}+ Decimal - PENDING: specifiers
33 Comment start Strip until end of comment.
40 D Try to read a number
41 Punct Try to read punct
44 extern int fatalError;
46 extern char *filename;
54 static char linebuf[10000];
55 static int linepos, linelen;
56 static int end_of_file;
64 #define ERRSINK stderr
67 error (const char *sz,...)
72 if (filename && lineno)
74 fprintf (ERRSINK, "%s(%d):", filename, lineno);
76 fprintf (ERRSINK, "error *** ");
78 vfprintf (ERRSINK, sz, ap);
80 fprintf (ERRSINK, "\n");
87 linelen = fread (linebuf, 1, sizeof (linebuf), yyin);
91 return linebuf[linepos++];
97 if (linepos < linelen)
98 return linebuf[linepos++];
106 linebuf[--linepos] = c;
110 #define GETC() ygetc()
111 #define UNGETC(_a) yungetc(_a)
113 //#define GETC() fgetc(yyin);
114 //#define UNGETC(_a) ungetc(_a, yyin)
115 #define ISL(_a) (isalnum(_a) || _a == '_')
116 #define ISALNUM(_a) isalnum(_a)
117 #define ISHEX(_a) isxdigit(_a)
122 static char line[1000];
127 /* put into the buffer till we hit the */
134 break; /* end of input */
135 /* if it is a \ then everything allowed */
138 *str++ = ch; /* backslash in place */
139 *str++ = GETC (); /* following char in place */
140 continue; /* carry on */
143 /* if new line we have a new line break */
147 /* if this is a quote then we have work to do */
148 /* find the next non whitespace character */
149 /* if that is a double quote then carry on */
153 while ((ch = GETC ()) && isspace (ch));
172 discard_comments (int type)
191 else if (type == '/')
197 while (c != '\n' && c != EOF);
205 /* will return 1 if the string is a part
206 of a target specific keyword */
208 isTargetKeyword (const char *s)
212 if (port->keywords == NULL)
214 for (i = 0; port->keywords[i]; i++)
216 if (strcmp (port->keywords[i], s) == 0)
224 check_token (const char *sz)
226 const struct reserved_words *p;
227 p = is_reserved_word (sz, strlen (sz));
230 if (!p->is_special || isTargetKeyword (sz))
234 /* check if it is in the typedef table */
235 if (findSym (TypedefTab, NULL, sz))
237 strcpy (yylval.yychar, sz);
242 strcpy (yylval.yychar, sz);
254 while (c == '\t' || c == ' ')
264 error ("Missing argument to pragma");
267 /* First give the port a chance */
268 if (port->process_pragma && !port->process_pragma (line))
270 /* PENDING: all the SDCC shared pragmas */
271 /* Nothing handled it */
272 error ("Unrecognised #pragma %s", line);
283 while (c == '\t' || c == ' ')
293 error ("Error in number in #line");
294 /* This is weird but cpp seems to add an extra three to the line no */
295 yylineno = atoi (line) - 3;
297 /* Fetch the filename if there is one */
298 while (c == '\t' || c == ' ')
304 while (c != '\"' && c != EOF && c != '\n')
312 currFname = gc_strdup (line);
314 filename = currFname;
319 invalid_directive (void)
321 error ("Invalid directive");
331 /* Skip any leading white space */
333 while (c == '\t' || c == ' ')
335 /* Were only interested in #something */
339 while (c == '\t' || c == ' ')
341 /* The text in the stream is the type of directive */
346 if (GETC () == 'i' && GETC () == 'n' && GETC () == 'e')
349 if (c == '\t' || c == ' ')
352 invalid_directive ();
355 invalid_directive ();
358 /* Start of pragma? */
359 if (GETC () == 'r' && GETC () == 'a' && GETC () == 'g' &&
360 GETC () == 'm' && GETC () == 'a')
363 if (c == '\t' || c == ' ')
366 invalid_directive ();
369 invalid_directive ();
372 invalid_directive ();
374 /* Discard from here until the start of the next line */
375 while (c != '\n' && c != EOF)
381 skip_whitespace (int c)
396 c = check_newline ();
404 yyerror (const char *s)
407 error ("%s at end of of input", s);
408 else if (yytext[0] == '\0')
409 error ("%s at null character", s);
410 else if (yytext[0] == '"')
411 error ("%s before string constant", s);
412 else if (yytext[0] == '\'')
413 error ("%s before character constant", s);
415 error ("%s before %s", s, yytext);
422 static char line[128];
437 /* Skip whitespace */
442 c = skip_whitespace (c);
446 c = check_newline ();
454 /* Handle comments first */
458 if (c2 == '*' || c2 == '/')
460 discard_comments (c2);
525 /* Start of a token. Parse. */
536 return check_token (line);
550 if (c == 'x' || c == 'X')
560 if (c == 'U' || c == 'u' || c == 'L' || c == 'l')
565 if (c == 'U' || c == 'u' || c == 'L' || c == 'l')
572 yylval.val = constVal (line);
576 p = stringLiteral ();
577 yylval.val = strVal (p);
578 return (STRING_LITERAL);
581 ['\n', '\\', '\'', '\"'...]
599 error ("Unrecognised character constant %s", line);
601 yylval.val = charVal (line);
616 /* Cases which can be compounds */
617 /* The types and classes of composites are:
619 += -= *= /= %= &= ^= |=
624 So a composite started by char 'x' can be:
625 1. Followed by itself then an equals
626 2. Followed by itself
627 3. Followed by an equals
643 yylval.yyint = RIGHT_ASSIGN;
647 yylval.yyint = LEFT_ASSIGN;
650 error ("Unrecognised token %c%c=", c, c);
655 /* Push the next char back on and find the class */
677 error ("Unrecognised token %c%c", c, c);
682 else if (next == '=')
721 error ("Unrecognised token %c=", c);
725 yylval.yyint = result;
730 else if (c == '-' && next == '>')
770 /* Special characters that cant be part of a composite */
773 error ("Unhandled character %c", c);
778 #define ENTRY(_a) case (_a): printf(#_a); break;
785 static int lastpos = 0;
788 printf ("Returning ");
795 ENTRY (STRING_LITERAL);
814 ENTRY (RIGHT_ASSIGN);
879 ENTRY (GET_VALUE_AT_ADDRESS);
901 printf ("default: %c", ret);
903 tmp = linebuf[linepos];
904 linebuf[linepos] = '\0';
905 printf (" for %s (%u bytes)\n", linebuf + lastpos, linepos - lastpos);
906 linebuf[linepos] = tmp;
913 #define TEST(_a) (_a) ? (void)0 : printf("Test %s failed\n", #_a);
916 altlex_testparse (const char *input)
918 /* Fiddle with the read-ahead buffer to insert ourselves */
919 strcpy (linebuf, input);
920 linelen = strlen (linebuf) + 1;
927 altlex_testchar (const char *input)
930 if (altlex_testparse (input) != CONSTANT)
933 if (val->type->class != SPECIFIER)
935 if (SPEC_NOUN (val->type) != V_CHAR)
937 if (SPEC_SCLS (val->type) != S_LITERAL)
939 return SPEC_CVAL (val->type).v_int;
943 altlex_testnum (const char *input)
946 if (altlex_testparse (input) != CONSTANT)
949 if (val->type->class != SPECIFIER)
951 if (SPEC_NOUN (val->type) != V_INT)
953 if (SPEC_SCLS (val->type) != S_LITERAL)
955 if (SPEC_USIGN (val->type))
956 return SPEC_CVAL (val->type).v_uint;
958 return SPEC_CVAL (val->type).v_int;
962 altlex_runtests (void)
964 /* These conditions are ripped directly from SDCC.lex */
965 /* First check the parsing of the basic tokens */
966 TEST (altlex_testparse (">>=") == RIGHT_ASSIGN);
967 TEST (altlex_testparse ("<<=") == LEFT_ASSIGN);
968 TEST (altlex_testparse ("+=") == ADD_ASSIGN);
969 TEST (altlex_testparse ("-=") == SUB_ASSIGN);
970 TEST (altlex_testparse ("*=") == MUL_ASSIGN);
971 TEST (altlex_testparse ("/=") == DIV_ASSIGN);
972 TEST (altlex_testparse ("%=") == MOD_ASSIGN);
973 TEST (altlex_testparse ("&=") == AND_ASSIGN);
974 TEST (altlex_testparse ("^=") == XOR_ASSIGN);
975 TEST (altlex_testparse ("|=") == OR_ASSIGN);
976 TEST (altlex_testparse (">>") == RIGHT_OP);
977 TEST (altlex_testparse ("<<") == LEFT_OP);
978 TEST (altlex_testparse ("++") == INC_OP);
979 TEST (altlex_testparse ("--") == DEC_OP);
980 TEST (altlex_testparse ("->") == PTR_OP);
981 TEST (altlex_testparse ("&&") == AND_OP);
982 TEST (altlex_testparse ("||") == OR_OP);
983 TEST (altlex_testparse ("<=") == LE_OP);
984 TEST (altlex_testparse (">=") == GE_OP);
985 TEST (altlex_testparse ("==") == EQ_OP);
986 TEST (altlex_testparse ("!=") == NE_OP);
987 TEST (altlex_testparse (";") == ';');
988 TEST (altlex_testparse ("{") == '{');
989 TEST (altlex_testparse ("}") == '}');
990 TEST (altlex_testparse (",") == ',');
991 TEST (altlex_testparse (":") == ':');
992 TEST (altlex_testparse ("=") == '=');
993 TEST (altlex_testparse ("(") == '(');
994 TEST (altlex_testparse (")") == ')');
995 TEST (altlex_testparse ("[") == '[');
996 TEST (altlex_testparse ("]") == ']');
997 TEST (altlex_testparse (".") == '.');
998 TEST (altlex_testparse ("&") == '&');
999 TEST (altlex_testparse ("!") == '!');
1000 TEST (altlex_testparse ("~") == '~');
1001 TEST (altlex_testparse ("-") == '-');
1002 TEST (altlex_testparse ("+") == '+');
1003 TEST (altlex_testparse ("*") == '*');
1004 TEST (altlex_testparse ("/") == '/');
1005 TEST (altlex_testparse ("%") == '%');
1006 TEST (altlex_testparse ("<") == '<');
1007 TEST (altlex_testparse (">") == '>');
1008 TEST (altlex_testparse ("^") == '^');
1009 TEST (altlex_testparse ("|") == '|');
1010 TEST (altlex_testparse ("?") == '?');
1012 /* Now some character constants */
1013 TEST (altlex_testchar ("'1'") == '1');
1014 TEST (altlex_testchar ("'a'") == 'a');
1015 TEST (altlex_testchar ("'A'") == 'A');
1016 TEST (altlex_testchar ("'z'") == 'z');
1017 TEST (altlex_testchar ("'Z'") == 'Z');
1018 TEST (altlex_testchar ("'\n'") == '\n');
1019 TEST (altlex_testchar ("'\\\\'") == '\\');
1020 TEST (altlex_testchar ("'\\''") == '\'');
1022 /* And some numbers */
1023 TEST (altlex_testnum ("0") == 0);
1024 TEST (altlex_testnum ("1") == 1);
1025 TEST (altlex_testnum ("075") == 075);
1026 TEST (altlex_testnum ("0xfeed") == 0xfeed);
1027 TEST (altlex_testnum ("0xFEED") == 0xFEED);
1028 TEST (altlex_testnum ("0x00005678") == 0x5678);
1031 TEST (altlex_testparse ("auto") == AUTO);
1032 TEST (altlex_testparse ("break") == BREAK);
1033 TEST (altlex_testparse ("case") == CASE);
1034 TEST (altlex_testparse ("char") == CHAR);
1035 TEST (altlex_testparse ("const") == CONST);
1036 TEST (altlex_testparse ("continue") == CONTINUE);
1037 TEST (altlex_testparse ("default") == DEFAULT);
1038 TEST (altlex_testparse ("do") == DO);
1039 /* Prints a warning */
1040 // TEST(altlex_testparse("double") == FLOAT);
1041 TEST (altlex_testparse ("else") == ELSE);
1042 TEST (altlex_testparse ("enum") == ENUM);
1043 TEST (altlex_testparse ("extern") == EXTERN);
1044 TEST (altlex_testparse ("float") == FLOAT);
1045 TEST (altlex_testparse ("for") == FOR);
1046 TEST (altlex_testparse ("goto") == GOTO);
1047 TEST (altlex_testparse ("if") == IF);
1048 TEST (altlex_testparse ("int") == INT);
1049 TEST (altlex_testparse ("interrupt") == INTERRUPT);
1050 TEST (altlex_testparse ("long") == LONG);
1051 TEST (altlex_testparse ("register") == REGISTER);
1052 TEST (altlex_testparse ("return") == RETURN);
1053 TEST (altlex_testparse ("short") == SHORT);
1054 TEST (altlex_testparse ("signed") == SIGNED);
1055 TEST (altlex_testparse ("sizeof") == SIZEOF);
1056 TEST (altlex_testparse ("static") == STATIC);
1057 TEST (altlex_testparse ("struct") == STRUCT);
1058 TEST (altlex_testparse ("switch") == SWITCH);
1059 TEST (altlex_testparse ("typedef") == TYPEDEF);
1060 TEST (altlex_testparse ("union") == UNION);
1061 TEST (altlex_testparse ("unsigned") == UNSIGNED);
1062 TEST (altlex_testparse ("void") == VOID);
1063 TEST (altlex_testparse ("volatile") == VOLATILE);
1064 TEST (altlex_testparse ("while") == WHILE);
1065 TEST (altlex_testparse ("...") == VAR_ARGS);
1068 /* Platform specific keywords */
1069 TEST (altlex_testparse ("sram") ==)
1074 TEST (altlex_testparse ("using") ==)
1079 TEST (altlex_testparse ("near") ==)
1084 TEST (altlex_testparse ("at") ==)
1089 TEST (altlex_testparse ("bit") ==)
1094 TEST (altlex_testparse ("code") ==)
1099 TEST (altlex_testparse ("critical") ==)
1102 TKEYWORD (CRITICAL);
1104 TEST (altlex_testparse ("data") ==)
1109 TEST (altlex_testparse ("far") ==)
1114 TEST (altlex_testparse ("eeprom") ==)
1119 TEST (altlex_testparse ("flash") ==)
1124 TEST (altlex_testparse ("idata") ==)
1129 TEST (altlex_testparse ("nonbanked") ==)
1132 TKEYWORD (NONBANKED);
1134 TEST (altlex_testparse ("banked") ==)
1139 TEST (altlex_testparse ("pdata") ==)
1144 TEST (altlex_testparse ("reentrant") ==)
1147 TKEYWORD (REENTRANT);
1149 TEST (altlex_testparse ("sfr") ==)
1154 TEST (altlex_testparse ("sbit") ==)
1159 TEST (altlex_testparse ("xdata") ==)
1164 TEST (altlex_testparse ("_data") ==)
1169 TEST (altlex_testparse ("_code") ==)
1174 TEST (altlex_testparse ("_eeprom") ==)
1179 TEST (altlex_testparse ("_flash") ==)
1184 TEST (altlex_testparse ("_generic") ==)
1187 TKEYWORD (_GENERIC);
1189 TEST (altlex_testparse ("_near") ==)
1194 TEST (altlex_testparse ("_sram") ==)
1199 TEST (altlex_testparse ("_xdata") ==)
1204 TEST (altlex_testparse ("_pdata") ==)
1209 TEST (altlex_testparse ("_idata") ==)