2 An alternate lexer to SDCC.lex.
3 In development - ie messy and just plain wrong.
4 Inspired by the gcc lexer, c-lex.c.
12 /* Right. What are the parts of the C stream? From SDCC.lex:
14 L = [a..z A..Z _] alphanumerics and _
15 H = [a..f A..F 0-9] Hex digits
16 E = [eE+-0-9] Digits in a float
17 FS = [fFlL] Specifiers for a float
18 IS = [uUlL] Specifiers for a int
20 L[LD]* A 'token' - cant think of a good name
21 Check tokens against the reserved words.
25 If in the typedef table, do stuff...
26 Blah. See check_type()
27 0[xX]{H}+ Hex number - PENDING: specifiers
28 0{D}+ Octal number - PENDING: specifiers
29 {D}+ Decimal - PENDING: specifiers
33 Comment start Strip until end of comment.
40 D Try to read a number
41 Punct Try to read punct
44 extern int fatalError;
46 extern char *filename;
54 static char linebuf[10000];
55 static int linepos, linelen;
56 static int end_of_file;
64 #define ERRSINK stderr
66 static void error(const char *sz, ...)
71 if (filename && lineno) {
72 fprintf(ERRSINK, "%s(%d):",filename,lineno);
74 fprintf(ERRSINK, "error *** ");
76 vfprintf(ERRSINK, sz, ap);
78 fprintf(ERRSINK, "\n");
82 static int underflow(void)
84 linelen = fread(linebuf, 1, sizeof(linebuf), yyin);
88 return linebuf[linepos++];
91 static int INLINE ygetc(void)
93 if (linepos < linelen)
94 return linebuf[linepos++];
99 static int INLINE yungetc(int c)
101 linebuf[--linepos] = c;
105 #define GETC() ygetc()
106 #define UNGETC(_a) yungetc(_a)
108 //#define GETC() fgetc(yyin);
109 //#define UNGETC(_a) ungetc(_a, yyin)
110 #define ISL(_a) (isalnum(_a) || _a == '_')
111 #define ISALNUM(_a) isalnum(_a)
112 #define ISHEX(_a) isxdigit(_a)
114 static char *stringLiteral (void)
116 static char line[1000];
121 /* put into the buffer till we hit the */
126 if (!ch) break ; /* end of input */
127 /* if it is a \ then everything allowed */
129 *str++ = ch ; /* backslash in place */
130 *str++ = GETC() ; /* following char in place */
131 continue ; /* carry on */
134 /* if new line we have a new line break */
135 if (ch == '\n') break ;
137 /* if this is a quote then we have work to do */
138 /* find the next non whitespace character */
139 /* if that is a double quote then carry on */
142 while ((ch = GETC()) && isspace(ch)) ;
158 static void discard_comments(int type)
173 else if (type == '/') {
176 } while (c != '\n' && c != EOF);
183 /* will return 1 if the string is a part
184 of a target specific keyword */
185 static INLINE int isTargetKeyword(const char *s)
189 if (port->keywords == NULL)
191 for ( i = 0 ; port->keywords[i] ; i++ ) {
192 if (strcmp(port->keywords[i],s) == 0)
199 static INLINE int check_token(const char *sz)
201 const struct reserved_words *p;
202 p = is_reserved_word(sz, strlen(sz));
204 if (!p->is_special || isTargetKeyword(sz))
208 /* check if it is in the typedef table */
209 if (findSym(TypedefTab,NULL,sz)) {
210 strcpy(yylval.yychar,sz);
214 strcpy (yylval.yychar,sz);
219 static void handle_pragma(void)
225 while (c == '\t' || c == ' ')
228 while (!isspace(c)) {
234 error("Missing argument to pragma");
236 /* First give the port a chance */
237 if (port->process_pragma && !port->process_pragma(line))
239 /* PENDING: all the SDCC shared pragmas */
240 /* Nothing handled it */
241 error("Unrecognised #pragma %s", line);
245 static void handle_line(void)
251 while (c == '\t' || c == ' ')
260 error("Error in number in #line");
261 /* This is weird but cpp seems to add an extra three to the line no */
262 yylineno = atoi(line) - 3;
264 /* Fetch the filename if there is one */
265 while (c == '\t' || c == ' ')
270 while (c != '\"' && c != EOF && c != '\n') {
276 currFname = gc_strdup(line);
278 filename = currFname;
282 static INLINE void invalid_directive(void)
284 error("Invalid directive");
287 static INLINE int check_newline(void)
293 /* Skip any leading white space */
295 while (c == '\t' || c == ' ')
297 /* Were only interested in #something */
301 while (c == '\t' || c == ' ')
303 /* The text in the stream is the type of directive */
307 if (GETC() == 'i' && GETC() == 'n' && GETC() == 'e') {
309 if (c == '\t' || c == ' ')
318 /* Start of pragma? */
319 if (GETC() == 'r' && GETC() == 'a' && GETC() == 'g' &&
320 GETC() == 'm' && GETC() == 'a') {
322 if (c == '\t' || c == ' ')
333 /* Discard from here until the start of the next line */
334 while (c != '\n' && c != EOF)
339 static int skip_whitespace(int c)
359 void yyerror(const char *s)
362 error("%s at end of of input", s);
363 else if (yytext[0] == '\0')
364 error("%s at null character", s);
365 else if (yytext[0] == '"')
366 error("%s before string constant", s);
367 else if (yytext[0] == '\'')
368 error("%s before character constant", s);
370 error("%s before %s", s, yytext);
373 static int _yylex(void)
376 static char line[128];
389 /* Skip whitespace */
394 c = skip_whitespace(c);
406 /* Handle comments first */
409 if (c2 == '*' || c2 == '/') {
410 discard_comments(c2);
421 case 'a': case 'b': case 'c': case 'd':
422 case 'e': case 'f': case 'g': case 'h':
423 case 'i': case 'j': case 'k': case 'l':
424 case 'm': case 'n': case 'o': case 'p':
425 case 'q': case 'r': case 's': case 't':
426 case 'u': case 'v': case 'w': case 'x':
428 case 'A': case 'B': case 'C': case 'D':
429 case 'E': case 'F': case 'G': case 'H':
430 case 'I': case 'J': case 'K': case 'L':
431 case 'M': case 'N': case 'O': case 'P':
432 case 'Q': case 'R': case 'S': case 'T':
433 case 'U': case 'V': case 'W': case 'X':
436 /* Start of a token. Parse. */
446 return check_token(line);
448 case '2': case '3': case '4': case '5':
449 case '6': case '7': case '8': case '9':
453 if (c == 'x' || c == 'X') {
461 if (c == 'U' || c == 'u' || c == 'L' || c == 'l') {
465 if (c == 'U' || c == 'u' || c == 'L' || c == 'l') {
471 yylval.val = constVal(line);
476 yylval.val = strVal(p);
477 return(STRING_LITERAL);
480 ['\n', '\\', '\'', '\"'...]
496 error("Unrecognised character constant %s", line);
498 yylval.val = charVal(line);
512 /* Cases which can be compounds */
513 /* The types and classes of composites are:
515 += -= *= /= %= &= ^= |=
520 So a composite started by char 'x' can be:
521 1. Followed by itself then an equals
522 2. Followed by itself
523 3. Followed by an equals
535 yylval.yyint = RIGHT_ASSIGN;
538 yylval.yyint = LEFT_ASSIGN;
541 error("Unrecognised token %c%c=", c, c);
545 /* Push the next char back on and find the class */
564 error("Unrecognised token %c%c", c, c);
569 else if (next == '=') {
573 result = ADD_ASSIGN; break;
575 result = SUB_ASSIGN; break;
577 result = MUL_ASSIGN; break;
579 result = DIV_ASSIGN; break;
581 result = MOD_ASSIGN; break;
583 result = AND_ASSIGN; break;
585 result = XOR_ASSIGN; break;
587 result = OR_ASSIGN; break;
589 result = LE_OP; break;
591 result = GE_OP; break;
593 result = NE_OP; break;
595 error("Unrecognised token %c=", c);
598 yylval.yyint = result;
603 else if (c == '-' && next == '>') {
637 /* Special characters that cant be part of a composite */
640 error("Unhandled character %c", c);
645 #define ENTRY(_a) case (_a): printf(#_a); break;
651 static int lastpos = 0;
654 printf("Returning ");
660 ENTRY(STRING_LITERAL);
744 ENTRY(GET_VALUE_AT_ADDRESS);
766 printf("default: %c", ret);
768 tmp = linebuf[linepos];
769 linebuf[linepos] = '\0';
770 printf(" for %s (%u bytes)\n", linebuf + lastpos, linepos - lastpos);
771 linebuf[linepos] = tmp;
778 #define TEST(_a) (_a) ? (void)0 : printf("Test %s failed\n", #_a);
780 int altlex_testparse(const char *input)
782 /* Fiddle with the read-ahead buffer to insert ourselves */
783 strcpy(linebuf, input);
784 linelen = strlen(linebuf)+1;
790 int altlex_testchar(const char *input)
793 if (altlex_testparse(input) != CONSTANT)
796 if (val->type->class != SPECIFIER)
798 if (SPEC_NOUN(val->type) != V_CHAR)
800 if (SPEC_SCLS(val->type) != S_LITERAL)
802 return SPEC_CVAL(val->type).v_int;
805 int altlex_testnum(const char *input)
808 if (altlex_testparse(input) != CONSTANT)
811 if (val->type->class != SPECIFIER)
813 if (SPEC_NOUN(val->type) != V_INT)
815 if (SPEC_SCLS(val->type) != S_LITERAL)
817 if (SPEC_USIGN(val->type))
818 return SPEC_CVAL(val->type).v_uint;
820 return SPEC_CVAL(val->type).v_int;
823 int altlex_runtests(void)
825 /* These conditions are ripped directly from SDCC.lex */
826 /* First check the parsing of the basic tokens */
827 TEST(altlex_testparse(">>=") == RIGHT_ASSIGN);
828 TEST(altlex_testparse("<<=") == LEFT_ASSIGN);
829 TEST(altlex_testparse("+=") == ADD_ASSIGN);
830 TEST(altlex_testparse("-=") == SUB_ASSIGN);
831 TEST(altlex_testparse("*=") == MUL_ASSIGN);
832 TEST(altlex_testparse("/=") == DIV_ASSIGN);
833 TEST(altlex_testparse("%=") == MOD_ASSIGN);
834 TEST(altlex_testparse("&=") == AND_ASSIGN);
835 TEST(altlex_testparse("^=") == XOR_ASSIGN);
836 TEST(altlex_testparse("|=") == OR_ASSIGN);
837 TEST(altlex_testparse(">>") == RIGHT_OP);
838 TEST(altlex_testparse("<<") == LEFT_OP);
839 TEST(altlex_testparse("++") == INC_OP);
840 TEST(altlex_testparse("--") == DEC_OP);
841 TEST(altlex_testparse("->") == PTR_OP);
842 TEST(altlex_testparse("&&") == AND_OP);
843 TEST(altlex_testparse("||") == OR_OP);
844 TEST(altlex_testparse("<=") == LE_OP);
845 TEST(altlex_testparse(">=") == GE_OP);
846 TEST(altlex_testparse("==") == EQ_OP);
847 TEST(altlex_testparse("!=") == NE_OP);
848 TEST(altlex_testparse(";") == ';');
849 TEST(altlex_testparse("{") == '{');
850 TEST(altlex_testparse("}") == '}');
851 TEST(altlex_testparse(",") == ',');
852 TEST(altlex_testparse(":") == ':');
853 TEST(altlex_testparse("=") == '=');
854 TEST(altlex_testparse("(") == '(');
855 TEST(altlex_testparse(")") == ')');
856 TEST(altlex_testparse("[") == '[');
857 TEST(altlex_testparse("]") == ']');
858 TEST(altlex_testparse(".") == '.');
859 TEST(altlex_testparse("&") == '&');
860 TEST(altlex_testparse("!") == '!');
861 TEST(altlex_testparse("~") == '~');
862 TEST(altlex_testparse("-") == '-');
863 TEST(altlex_testparse("+") == '+');
864 TEST(altlex_testparse("*") == '*');
865 TEST(altlex_testparse("/") == '/');
866 TEST(altlex_testparse("%") == '%');
867 TEST(altlex_testparse("<") == '<');
868 TEST(altlex_testparse(">") == '>');
869 TEST(altlex_testparse("^") == '^');
870 TEST(altlex_testparse("|") == '|');
871 TEST(altlex_testparse("?") == '?');
873 /* Now some character constants */
874 TEST(altlex_testchar("'1'") == '1');
875 TEST(altlex_testchar("'a'") == 'a');
876 TEST(altlex_testchar("'A'") == 'A');
877 TEST(altlex_testchar("'z'") == 'z');
878 TEST(altlex_testchar("'Z'") == 'Z');
879 TEST(altlex_testchar("'\n'") == '\n');
880 TEST(altlex_testchar("'\\\\'") == '\\');
881 TEST(altlex_testchar("'\\''") == '\'');
883 /* And some numbers */
884 TEST(altlex_testnum("0") == 0);
885 TEST(altlex_testnum("1") == 1);
886 TEST(altlex_testnum("075") == 075);
887 TEST(altlex_testnum("0xfeed") == 0xfeed);
888 TEST(altlex_testnum("0xFEED") == 0xFEED);
889 TEST(altlex_testnum("0x00005678") == 0x5678);
892 TEST(altlex_testparse("auto") == AUTO);
893 TEST(altlex_testparse("break") == BREAK);
894 TEST(altlex_testparse("case") == CASE);
895 TEST(altlex_testparse("char") == CHAR);
896 TEST(altlex_testparse("const") == CONST);
897 TEST(altlex_testparse("continue") == CONTINUE);
898 TEST(altlex_testparse("default") == DEFAULT);
899 TEST(altlex_testparse("do") == DO);
900 /* Prints a warning */
901 // TEST(altlex_testparse("double") == FLOAT);
902 TEST(altlex_testparse("else") == ELSE);
903 TEST(altlex_testparse("enum") == ENUM);
904 TEST(altlex_testparse("extern") == EXTERN);
905 TEST(altlex_testparse("float") == FLOAT);
906 TEST(altlex_testparse("for") == FOR);
907 TEST(altlex_testparse("goto") == GOTO);
908 TEST(altlex_testparse("if") == IF);
909 TEST(altlex_testparse("int") == INT);
910 TEST(altlex_testparse("interrupt") == INTERRUPT);
911 TEST(altlex_testparse("long") == LONG);
912 TEST(altlex_testparse("register") == REGISTER);
913 TEST(altlex_testparse("return") == RETURN);
914 TEST(altlex_testparse("short") == SHORT);
915 TEST(altlex_testparse("signed") == SIGNED);
916 TEST(altlex_testparse("sizeof") == SIZEOF);
917 TEST(altlex_testparse("static") == STATIC);
918 TEST(altlex_testparse("struct") == STRUCT);
919 TEST(altlex_testparse("switch") == SWITCH);
920 TEST(altlex_testparse("typedef") == TYPEDEF);
921 TEST(altlex_testparse("union") == UNION);
922 TEST(altlex_testparse("unsigned") == UNSIGNED);
923 TEST(altlex_testparse("void") == VOID);
924 TEST(altlex_testparse("volatile") == VOLATILE);
925 TEST(altlex_testparse("while") == WHILE);
926 TEST(altlex_testparse("...") == VAR_ARGS);
929 /* Platform specific keywords */
930 TEST(altlex_testparse("sram") ==) { count(); TKEYWORD(XDATA);}
931 TEST(altlex_testparse("using") ==) { count(); TKEYWORD(USING); }
932 TEST(altlex_testparse("near") ==) { count(); TKEYWORD(DATA);}
933 TEST(altlex_testparse("at") ==) { count(); TKEYWORD(AT) ; }
934 TEST(altlex_testparse("bit") ==) { count(); TKEYWORD(BIT) ; }
935 TEST(altlex_testparse("code") ==) { count(); TKEYWORD(CODE); }
936 TEST(altlex_testparse("critical") ==) { count(); TKEYWORD(CRITICAL); }
937 TEST(altlex_testparse("data") ==) { count(); TKEYWORD(DATA); }
938 TEST(altlex_testparse("far") ==) { count(); TKEYWORD(XDATA); }
939 TEST(altlex_testparse("eeprom") ==) { count(); TKEYWORD(EEPROM); }
940 TEST(altlex_testparse("flash") ==) { count(); TKEYWORD(CODE);}
941 TEST(altlex_testparse("idata") ==) { count(); TKEYWORD(IDATA);}
942 TEST(altlex_testparse("nonbanked") ==) { count(); TKEYWORD(NONBANKED);}
943 TEST(altlex_testparse("banked") ==) { count(); TKEYWORD(BANKED);}
944 TEST(altlex_testparse("pdata") ==) { count(); TKEYWORD(PDATA); }
945 TEST(altlex_testparse("reentrant") ==) { count(); TKEYWORD(REENTRANT);}
946 TEST(altlex_testparse("sfr") ==) { count(); TKEYWORD(SFR) ; }
947 TEST(altlex_testparse("sbit") ==) { count(); TKEYWORD(SBIT) ; }
948 TEST(altlex_testparse("xdata") ==) { count(); TKEYWORD(XDATA); }
949 TEST(altlex_testparse("_data") ==) { count(); TKEYWORD(_NEAR); }
950 TEST(altlex_testparse("_code") ==) { count(); TKEYWORD(_CODE); }
951 TEST(altlex_testparse("_eeprom") ==) { count(); TKEYWORD(_EEPROM); }
952 TEST(altlex_testparse("_flash") ==) { count(); TKEYWORD(_CODE); }
953 TEST(altlex_testparse("_generic") ==) { count(); TKEYWORD(_GENERIC); }
954 TEST(altlex_testparse("_near") ==) { count(); TKEYWORD(_NEAR); }
955 TEST(altlex_testparse("_sram") ==) { count(); TKEYWORD(_XDATA);}
956 TEST(altlex_testparse("_xdata") ==) { count(); TKEYWORD(_XDATA);}
957 TEST(altlex_testparse("_pdata") ==) { count(); TKEYWORD(_PDATA); }
958 TEST(altlex_testparse("_idata") ==) { count(); TKEYWORD(_IDATA); }