2 An alternate lexer to SDCC.lex.
3 In development - ie messy and just plain wrong.
15 /* Right. What are the parts of the C stream? From SDCC.lex:
17 L = [a..z A..Z _] alphanumerics and _
18 H = [a..f A..F 0-9] Hex digits
19 E = [eE+-0-9] Digits in a float
20 FS = [fFlL] Specifiers for a float
21 IS = [uUlL] Specifiers for a int
23 L[LD]* A 'token' - cant think of a good name
24 Check tokens against the reserved words.
28 If in the typedef table, do stuff...
29 Blah. See check_type()
30 0[xX]{H}+ Hex number - PENDING: specifiers
31 0{D}+ Octal number - PENDING: specifiers
32 {D}+ Decimal - PENDING: specifiers
36 Comment start Strip until end of comment.
43 D Try to read a number
44 Punct Try to read punct
52 static int underflow(void)
54 linelen = fread(linebuf, 1, sizeof(linebuf), yyin);
58 return linebuf[linepos++];
61 static int INLINE ygetc(void)
63 if (linepos < linelen)
64 return linebuf[linepos++];
69 static int INLINE yungetc(int c)
71 linebuf[--linepos] = c;
75 #define GETC() ygetc()
76 #define UNGETC(_a) yungetc(_a)
78 //#define GETC() fgetc(yyin);
79 //#define UNGETC(_a) ungetc(_a, yyin)
80 #define ISL(_a) (isalnum(_a) || _a == '_')
81 #define ISALNUM(_a) isalnum(_a)
82 #define ISHEX(_a) isalnum(_a)
84 char *stringLiteral (void)
86 static char line[1000];
91 /* put into the buffer till we hit the */
96 if (!ch) break ; /* end of input */
97 /* if it is a \ then everything allowed */
99 *str++ = ch ; /* backslash in place */
100 *str++ = GETC() ; /* following char in place */
101 continue ; /* carry on */
104 /* if new line we have a new line break */
105 if (ch == '\n') break ;
107 /* if this is a quote then we have work to do */
108 /* find the next non whitespace character */
109 /* if that is a double quote then carry on */
112 while ((ch = GETC()) && isspace(ch)) ;
128 void discard_comments(int type)
143 else if (type == '/') {
144 while (c != '\n' && c != EOF) {
153 #define TKEYWORD(_a) return _a
155 int check_token(const char *sz)
157 if (!strcmp(sz, "at")) {
160 else if (!strcmp(sz, "auto")) {
163 else if (!strcmp(sz, "bit")) {
166 else if (!strcmp(sz, "break")) {
169 else if (!strcmp(sz, "case")) {
172 else if (!strcmp(sz, "char")) {
175 else if (!strcmp(sz, "code")) {
178 else if (!strcmp(sz, "const")) {
181 else if (!strcmp(sz, "continue")) {
184 else if (!strcmp(sz, "critical")) {
185 TKEYWORD(CRITICAL); }
187 else if (!strcmp(sz, "data")) {
190 else if (!strcmp(sz, "default")) {
193 else if (!strcmp(sz, "do")) {
196 else if (!strcmp(sz, "double")) {
197 werror(W_DOUBLE_UNSUPPORTED);return(FLOAT); }
199 else if (!strcmp(sz, "else")) {
202 else if (!strcmp(sz, "enum")) {
205 else if (!strcmp(sz, "extern")) {
208 else if (!strcmp(sz, "far")) {
211 else if (!strcmp(sz, "eeprom")) {
214 else if (!strcmp(sz, "float")) {
217 else if (!strcmp(sz, "flash")) {
220 else if (!strcmp(sz, "for")) {
223 else if (!strcmp(sz, "goto")) {
226 else if (!strcmp(sz, "idata")) {
229 else if (!strcmp(sz, "if")) {
232 else if (!strcmp(sz, "int")) {
235 else if (!strcmp(sz, "interrupt")) {
238 else if (!strcmp(sz, "nonbanked")) {
239 TKEYWORD(NONBANKED);}
241 else if (!strcmp(sz, "banked")) {
244 else if (!strcmp(sz, "long")) {
247 else if (!strcmp(sz, "near")) {
250 else if (!strcmp(sz, "pdata")) {
253 else if (!strcmp(sz, "reentrant")) {
254 TKEYWORD(REENTRANT);}
256 else if (!strcmp(sz, "register")) {
259 else if (!strcmp(sz, "return")) {
262 else if (!strcmp(sz, "sfr")) {
265 else if (!strcmp(sz, "sbit")) {
268 else if (!strcmp(sz, "short")) {
271 else if (!strcmp(sz, "signed")) {
274 else if (!strcmp(sz, "sizeof")) {
277 else if (!strcmp(sz, "sram")) {
280 else if (!strcmp(sz, "static")) {
283 else if (!strcmp(sz, "struct")) {
286 else if (!strcmp(sz, "switch")) {
289 else if (!strcmp(sz, "typedef")) {
292 else if (!strcmp(sz, "union")) {
295 else if (!strcmp(sz, "unsigned")) {
298 else if (!strcmp(sz, "void")) {
301 else if (!strcmp(sz, "volatile")) {
304 else if (!strcmp(sz, "using")) {
307 else if (!strcmp(sz, "while")) {
310 else if (!strcmp(sz, "xdata")) {
313 else if (!strcmp(sz, "_data")) {
316 else if (!strcmp(sz, "_code")) {
319 else if (!strcmp(sz, "_eeprom")) {
322 else if (!strcmp(sz, "_flash")) {
325 else if (!strcmp(sz, "_generic")) {
326 TKEYWORD(_GENERIC); }
328 else if (!strcmp(sz, "_near")) {
331 else if (!strcmp(sz, "_sram")) {
334 else if (!strcmp(sz, "_xdata")) {
337 else if (!strcmp(sz, "_pdata")) {
340 else if (!strcmp(sz, "_idata")) {
343 /* check if it is in the typedef table */
344 if (findSym(TypedefTab,NULL,sz)) {
345 strcpy(yylval.yychar,sz);
349 strcpy (yylval.yychar,sz);
362 /* Handle comments first */
365 if (c2 == '*' || c2 == '/') {
366 discard_comments(c2);
380 /* Skip whitespace */
382 case 'a': case 'b': case 'c': case 'd':
383 case 'e': case 'f': case 'g': case 'h':
384 case 'i': case 'j': case 'k': case 'l':
385 case 'm': case 'n': case 'o': case 'p':
386 case 'q': case 'r': case 's': case 't':
387 case 'u': case 'v': case 'w': case 'x':
389 case 'A': case 'B': case 'C': case 'D':
390 case 'E': case 'F': case 'G': case 'H':
391 case 'I': case 'J': case 'K': case 'L':
392 case 'M': case 'N': case 'O': case 'P':
393 case 'Q': case 'R': case 'S': case 'T':
394 case 'U': case 'V': case 'W': case 'X':
397 /* Start of a token. Parse. */
407 return check_token(line);
409 case '2': case '3': case '4': case '5':
410 case '6': case '7': case '8': case '9':
414 if (c == 'x' || c == 'X') {
424 yylval.val = constVal(line);
429 yylval.val = strVal(p);
430 return(STRING_LITERAL);
435 /* Assume a pragma and toast the rest of the line. */
453 /* Cases which can be compounds */
477 /* Special characters that cant be part of a composite */
480 printf("Unhandled char %c\n", c);