1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
43 /* MULTIBYTE_CHARS support only works for native compilers.
44 ??? Ideally what we want is to model widechar support after
45 the current floating point support. */
47 #undef MULTIBYTE_CHARS
50 #ifdef MULTIBYTE_CHARS
55 /* Tokens with SPELL_STRING store their spelling in the token list,
56 and it's length in the token->val.name.len. */
68 enum spell_type category;
69 const unsigned char *name;
72 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
75 #define OP(e, s) { SPELL_OPERATOR, U s },
76 #define TK(e, s) { s, U STRINGX (e) },
77 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
81 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
82 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
84 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
85 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
86 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
88 static int skip_asm_block PARAMS ((cpp_reader *, int));
89 static int skip_block_comment PARAMS ((cpp_reader *));
90 static int skip_line_comment PARAMS ((cpp_reader *));
91 static void adjust_column PARAMS ((cpp_reader *));
92 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
93 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
97 static void unterminated PARAMS ((cpp_reader *, int));
98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99 static unsigned int copy_text_chars (char *, const char *, unsigned int, int);
100 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *, int));
101 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
102 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
103 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
104 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
105 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
106 const unsigned char *, unsigned int *));
108 static cpp_chunk *new_chunk PARAMS ((unsigned int));
109 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
110 static unsigned int hex_digit_value PARAMS ((unsigned int));
114 Compares, the token TOKEN to the NUL-terminated string STRING.
115 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
118 cpp_ideq (token, string)
119 const cpp_token *token;
122 if (token->type != CPP_NAME)
125 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
128 /* Call when meeting a newline. Returns the character after the newline
129 (or carriage-return newline combination), or EOF. */
131 handle_newline (buffer, newline_char)
133 cppchar_t newline_char;
135 cppchar_t next = EOF;
137 buffer->col_adjust = 0;
139 buffer->line_base = buffer->cur;
141 /* Handle CR-LF and LF-CR combinations, get the next character. */
142 if (buffer->cur < buffer->rlimit)
144 next = *buffer->cur++;
145 if (next + newline_char == '\r' + '\n')
147 buffer->line_base = buffer->cur;
148 if (buffer->cur < buffer->rlimit)
149 next = *buffer->cur++;
155 buffer->read_ahead = next;
159 /* Subroutine of skip_escaped_newlines; called when a trigraph is
160 encountered. It warns if necessary, and returns true if the
161 trigraph should be honoured. FROM_CHAR is the third character of a
162 trigraph, and presumed to be the previous character for position
165 trigraph_ok (pfile, from_char)
169 int accept = CPP_OPTION (pfile, trigraphs);
171 /* Don't warn about trigraphs in comments. */
172 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
174 cpp_buffer *buffer = pfile->buffer;
176 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
177 "trigraph ??%c converted to %c",
179 (int) _cpp_trigraph_map[from_char]);
180 else if (buffer->cur != buffer->last_Wtrigraphs)
182 buffer->last_Wtrigraphs = buffer->cur;
183 cpp_warning_with_line (pfile, buffer->lineno,
184 CPP_BUF_COL (buffer) - 2,
185 "trigraph ??%c ignored", (int) from_char);
192 /* Assumes local variables buffer and result. */
193 #define ACCEPT_CHAR(t) \
194 do { result->type = t; buffer->read_ahead = EOF; } while (0)
196 /* When we move to multibyte character sets, add to these something
197 that saves and restores the state of the multibyte conversion
198 library. This probably involves saving and restoring a "cookie".
199 In the case of glibc it is an 8-byte structure, so is not a high
200 overhead operation. In any case, it's out of the fast path. */
201 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
202 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
204 /* Skips any escaped newlines introduced by NEXT, which is either a
205 '?' or a '\\'. Returns the next character, which will also have
206 been placed in buffer->read_ahead. This routine performs
207 preprocessing stages 1 and 2 of the ISO C standard. */
209 skip_escaped_newlines (buffer, next)
213 /* Only do this if we apply stages 1 and 2. */
214 if (!buffer->from_stage3)
217 const unsigned char *saved_cur;
222 if (buffer->cur == buffer->rlimit)
228 next1 = *buffer->cur++;
229 if (next1 != '?' || buffer->cur == buffer->rlimit)
235 next1 = *buffer->cur++;
236 if (!_cpp_trigraph_map[next1]
237 || !trigraph_ok (buffer->pfile, next1))
243 /* We have a full trigraph here. */
244 next = _cpp_trigraph_map[next1];
245 if (next != '\\' || buffer->cur == buffer->rlimit)
250 /* We have a backslash, and room for at least one more character. */
254 next1 = *buffer->cur++;
255 if (!is_nvspace (next1))
259 while (buffer->cur < buffer->rlimit);
261 if (!is_vspace (next1))
267 if (space && !buffer->pfile->state.lexing_comment)
268 cpp_warning (buffer->pfile,
269 "backslash and newline separated by space");
271 next = handle_newline (buffer, next1);
273 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
275 while (next == '\\' || next == '?');
278 buffer->read_ahead = next;
282 /* Obtain the next character, after trigraph conversion and skipping
283 an arbitrary string of escaped newlines. The common case of no
284 trigraphs or escaped newlines falls through quickly. */
286 get_effective_char (buffer)
289 cppchar_t next = EOF;
291 if (buffer->cur < buffer->rlimit)
293 next = *buffer->cur++;
295 /* '?' can introduce trigraphs (and therefore backslash); '\\'
296 can introduce escaped newlines, which we want to skip, or
297 UCNs, which, depending upon lexer state, we will handle in
299 if (next == '?' || next == '\\')
300 next = skip_escaped_newlines (buffer, next);
303 buffer->read_ahead = next;
307 /* SDCC _asm specific */
308 /* Skip an _asm ... _endasm block. We find the end of the comment by
309 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
312 skip_asm_block (pfile, read_ahead)
316 #define _ENDASM_STR "endasm"
317 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
319 cpp_buffer *buffer = pfile->buffer;
324 pfile->state.lexing_comment = 1;
325 while (buffer->cur != buffer->rlimit)
327 if (read_ahead != EOF)
330 c = buffer->read_ahead;
335 prev_space = is_space(c);
340 /* FIXME: For speed, create a new character class of characters
341 of interest inside block comments. */
342 if (c == '?' || c == '\\')
343 c = skip_escaped_newlines (buffer, c);
345 if (prev_space && c == '_')
347 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
348 strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
350 buffer->cur += _ENDASM_LEN;
355 else if (is_vspace (c))
357 prev_space = is_space(c), c = handle_newline (buffer, c);
361 adjust_column (pfile);
364 pfile->state.lexing_comment = 0;
365 buffer->read_ahead = EOF;
369 /* Skip a C-style block comment. We find the end of the comment by
370 seeing if an asterisk is before every '/' we encounter. Returns
371 non-zero if comment terminated by EOF, zero otherwise. */
373 skip_block_comment (pfile)
376 cpp_buffer *buffer = pfile->buffer;
377 cppchar_t c = EOF, prevc = EOF;
379 pfile->state.lexing_comment = 1;
380 while (buffer->cur != buffer->rlimit)
382 prevc = c, c = *buffer->cur++;
385 /* FIXME: For speed, create a new character class of characters
386 of interest inside block comments. */
387 if (c == '?' || c == '\\')
388 c = skip_escaped_newlines (buffer, c);
390 /* People like decorating comments with '*', so check for '/'
391 instead for efficiency. */
397 /* Warn about potential nested comments, but not if the '/'
398 comes immediately before the true comment delimeter.
399 Don't bother to get it right across escaped newlines. */
400 if (CPP_OPTION (pfile, warn_comments)
401 && buffer->cur != buffer->rlimit)
403 prevc = c, c = *buffer->cur++;
404 if (c == '*' && buffer->cur != buffer->rlimit)
406 prevc = c, c = *buffer->cur++;
408 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
409 CPP_BUF_COL (buffer),
410 "\"/*\" within comment");
415 else if (is_vspace (c))
417 prevc = c, c = handle_newline (buffer, c);
421 adjust_column (pfile);
424 pfile->state.lexing_comment = 0;
425 buffer->read_ahead = EOF;
426 return c != '/' || prevc != '*';
429 /* Skip a C++ line comment. Handles escaped newlines. Returns
430 non-zero if a multiline comment. The following new line, if any,
431 is left in buffer->read_ahead. */
433 skip_line_comment (pfile)
436 cpp_buffer *buffer = pfile->buffer;
437 unsigned int orig_lineno = buffer->lineno;
440 pfile->state.lexing_comment = 1;
444 if (buffer->cur == buffer->rlimit)
448 if (c == '?' || c == '\\')
449 c = skip_escaped_newlines (buffer, c);
451 while (!is_vspace (c));
453 pfile->state.lexing_comment = 0;
454 buffer->read_ahead = c; /* Leave any newline for caller. */
455 return orig_lineno != buffer->lineno;
458 /* pfile->buffer->cur is one beyond the \t character. Update
459 col_adjust so we track the column correctly. */
461 adjust_column (pfile)
464 cpp_buffer *buffer = pfile->buffer;
465 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
467 /* Round it up to multiple of the tabstop, but subtract 1 since the
468 tab itself occupies a character position. */
469 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
470 - col % CPP_OPTION (pfile, tabstop)) - 1;
473 /* Skips whitespace, saving the next non-whitespace character.
474 Adjusts pfile->col_adjust to account for tabs. Without this,
475 tokens might be assigned an incorrect column. */
477 skip_whitespace (pfile, c)
481 cpp_buffer *buffer = pfile->buffer;
482 unsigned int warned = 0;
486 /* Horizontal space always OK. */
490 adjust_column (pfile);
491 /* Just \f \v or \0 left. */
496 cpp_warning (pfile, "null character(s) ignored");
500 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
501 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
502 CPP_BUF_COL (buffer),
503 "%s in preprocessing directive",
504 c == '\f' ? "form feed" : "vertical tab");
507 if (buffer->cur == buffer->rlimit)
511 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
512 while (is_nvspace (c));
514 /* Remember the next character. */
515 buffer->read_ahead = c;
518 /* See if the characters of a number token are valid in a name (no
521 name_p (pfile, string)
523 const cpp_string *string;
527 for (i = 0; i < string->len; i++)
528 if (!is_idchar (string->text[i]))
534 /* Parse an identifier, skipping embedded backslash-newlines.
535 Calculate the hash value of the token while parsing, for improved
536 performance. The hashing algorithm *must* match cpp_lookup(). */
538 static cpp_hashnode *
539 parse_identifier (pfile, c)
543 cpp_hashnode *result;
544 cpp_buffer *buffer = pfile->buffer;
545 unsigned int saw_dollar = 0, len;
546 struct obstack *stack = &pfile->hash_table->stack;
552 obstack_1grow (stack, c);
558 if (buffer->cur == buffer->rlimit)
563 while (is_idchar (c));
565 /* Potential escaped newline? */
566 if (c != '?' && c != '\\')
568 c = skip_escaped_newlines (buffer, c);
570 while (is_idchar (c));
572 /* Remember the next character. */
573 buffer->read_ahead = c;
575 /* $ is not a identifier character in the standard, but is commonly
576 accepted as an extension. Don't warn about it in skipped
577 conditional blocks. */
578 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
579 cpp_pedwarn (pfile, "'$' character(s) in identifier");
581 /* Identifiers are null-terminated. */
582 len = obstack_object_size (stack);
583 obstack_1grow (stack, '\0');
585 /* This routine commits the memory if necessary. */
586 result = (cpp_hashnode *)
587 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
589 /* Some identifiers require diagnostics when lexed. */
590 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
592 /* It is allowed to poison the same identifier twice. */
593 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
594 cpp_error (pfile, "attempt to use poisoned \"%s\"",
597 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
598 replacement list of a variadic macro. */
599 if (result == pfile->spec_nodes.n__VA_ARGS__
600 && !pfile->state.va_args_ok)
601 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
607 /* Parse a number, skipping embedded backslash-newlines. */
609 parse_number (pfile, number, c, leading_period)
615 cpp_buffer *buffer = pfile->buffer;
616 cpp_pool *pool = &pfile->ident_pool;
617 unsigned char *dest, *limit;
619 dest = POOL_FRONT (pool);
620 limit = POOL_LIMIT (pool);
622 /* Place a leading period. */
626 limit = _cpp_next_chunk (pool, 0, &dest);
634 /* Need room for terminating null. */
635 if (dest + 1 >= limit)
636 limit = _cpp_next_chunk (pool, 0, &dest);
640 if (buffer->cur == buffer->rlimit)
645 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
647 /* Potential escaped newline? */
648 if (c != '?' && c != '\\')
650 c = skip_escaped_newlines (buffer, c);
652 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
654 /* Remember the next character. */
655 buffer->read_ahead = c;
657 /* Null-terminate the number. */
660 number->text = POOL_FRONT (pool);
661 number->len = dest - number->text;
662 POOL_COMMIT (pool, number->len + 1);
665 /* Subroutine of parse_string. Emits error for unterminated strings. */
667 unterminated (pfile, term)
671 cpp_error (pfile, "missing terminating %c character", term);
673 if (term == '\"' && pfile->mlstring_pos.line
674 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
676 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
677 pfile->mlstring_pos.col,
678 "possible start of unterminated string literal");
679 pfile->mlstring_pos.line = 0;
683 /* Subroutine of parse_string. */
685 unescaped_terminator_p (pfile, dest)
687 const unsigned char *dest;
689 const unsigned char *start, *temp;
691 /* In #include-style directives, terminators are not escapeable. */
692 if (pfile->state.angled_headers)
695 start = POOL_FRONT (&pfile->ident_pool);
697 /* An odd number of consecutive backslashes represents an escaped
699 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
702 return ((dest - temp) & 1) == 0;
705 /* Parses a string, character constant, or angle-bracketed header file
706 name. Handles embedded trigraphs and escaped newlines. The stored
707 string is guaranteed NUL-terminated, but it is not guaranteed that
708 this is the first NUL since embedded NULs are preserved.
710 Multi-line strings are allowed, but they are deprecated. */
712 parse_string (pfile, token, terminator)
715 cppchar_t terminator;
717 cpp_buffer *buffer = pfile->buffer;
718 cpp_pool *pool = &pfile->ident_pool;
719 unsigned char *dest, *limit;
721 unsigned int nulls = 0;
723 dest = POOL_FRONT (pool);
724 limit = POOL_LIMIT (pool);
728 if (buffer->cur == buffer->rlimit)
734 /* We need space for the terminating NUL. */
736 limit = _cpp_next_chunk (pool, 0, &dest);
740 unterminated (pfile, terminator);
744 /* Handle trigraphs, escaped newlines etc. */
745 if (c == '?' || c == '\\')
746 c = skip_escaped_newlines (buffer, c);
748 if (c == terminator && unescaped_terminator_p (pfile, dest))
753 else if (is_vspace (c))
755 /* In assembly language, silently terminate string and
756 character literals at end of line. This is a kludge
757 around not knowing where comments are. */
758 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
761 /* Character constants and header names may not extend over
762 multiple lines. In Standard C, neither may strings.
763 Unfortunately, we accept multiline strings as an
764 extension, except in #include family directives. */
765 if (terminator != '"' || pfile->state.angled_headers)
767 unterminated (pfile, terminator);
771 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
772 if (pfile->mlstring_pos.line == 0)
773 pfile->mlstring_pos = pfile->lexer_pos;
775 c = handle_newline (buffer, c);
782 cpp_warning (pfile, "null character(s) preserved in literal");
788 /* Remember the next character. */
789 buffer->read_ahead = c;
792 token->val.str.text = POOL_FRONT (pool);
793 token->val.str.len = dest - token->val.str.text;
794 POOL_COMMIT (pool, token->val.str.len + 1);
797 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
798 comment blocks (when executed with -C option) and
799 _asm (SDCPP specific) blocks */
801 /* Count and copy characters from src to dest, excluding CRs:
802 CRs are automatically generated, because the output is
803 opened in TEXT mode. If dest == NULL, only count chars */
805 copy_text_chars (dest, src, len, read_ahead)
814 if (read_ahead != EOF && read_ahead != '\r')
817 *dest++ = read_ahead;
821 for (p = src; p != src + len; ++p)
836 /* SDCC _asm specific */
837 /* The stored comment includes the comment start and any terminator. */
839 save_asm (pfile, token, from, read_ahead)
842 const unsigned char *from;
845 #define _ASM_STR "_asm"
846 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
848 unsigned char *buffer;
849 unsigned int text_len, len;
851 /* ignore read_ahead if it is a CR */
852 if (read_ahead == '\r')
854 len = pfile->buffer->cur - from;
855 /* + _ASM_LEN for the initial '_asm'. */
856 text_len = copy_text_chars (NULL, from, len, read_ahead) + _ASM_LEN;
857 buffer = _cpp_pool_alloc (&pfile->ident_pool, text_len);
859 token->type = CPP_ASM;
860 token->val.str.len = text_len;
861 token->val.str.text = buffer;
863 memcpy (buffer, _ASM_STR, _ASM_LEN);
864 copy_text_chars (buffer + _ASM_LEN, from, len, read_ahead);
867 /* The stored comment includes the comment start and any terminator. */
869 save_comment (pfile, token, from)
872 const unsigned char *from;
874 unsigned char *buffer;
875 unsigned int text_len, len;
877 len = pfile->buffer->cur - from;
878 /* C++ comments probably (not definitely) have moved past a new
879 line, which we don't want to save in the comment. */
880 if (pfile->buffer->read_ahead != EOF)
882 /* + 1 for the initial '/'. */
883 text_len = copy_text_chars (NULL, from, len, EOF) + 1;
884 buffer = _cpp_pool_alloc (&pfile->ident_pool, text_len);
886 token->type = CPP_COMMENT;
887 token->val.str.len = text_len;
888 token->val.str.text = buffer;
891 copy_text_chars (buffer + 1, from, len, EOF);
894 /* Subroutine of lex_token to handle '%'. A little tricky, since we
895 want to avoid stepping back when lexing %:%X. */
897 lex_percent (buffer, result)
903 result->type = CPP_MOD;
904 /* Parsing %:%X could leave an extra character. */
905 if (buffer->extra_char == EOF)
906 c = get_effective_char (buffer);
909 c = buffer->read_ahead = buffer->extra_char;
910 buffer->extra_char = EOF;
914 ACCEPT_CHAR (CPP_MOD_EQ);
915 else if (CPP_OPTION (buffer->pfile, digraphs))
919 result->flags |= DIGRAPH;
920 ACCEPT_CHAR (CPP_HASH);
921 if (get_effective_char (buffer) == '%')
923 buffer->extra_char = get_effective_char (buffer);
924 if (buffer->extra_char == ':')
926 buffer->extra_char = EOF;
927 ACCEPT_CHAR (CPP_PASTE);
930 /* We'll catch the extra_char when we're called back. */
931 buffer->read_ahead = '%';
936 result->flags |= DIGRAPH;
937 ACCEPT_CHAR (CPP_CLOSE_BRACE);
942 /* Subroutine of lex_token to handle '.'. This is tricky, since we
943 want to avoid stepping back when lexing '...' or '.123'. In the
944 latter case we should also set a flag for parse_number. */
946 lex_dot (pfile, result)
950 cpp_buffer *buffer = pfile->buffer;
953 /* Parsing ..X could leave an extra character. */
954 if (buffer->extra_char == EOF)
955 c = get_effective_char (buffer);
958 c = buffer->read_ahead = buffer->extra_char;
959 buffer->extra_char = EOF;
962 /* All known character sets have 0...9 contiguous. */
963 if (c >= '0' && c <= '9')
965 result->type = CPP_NUMBER;
966 parse_number (pfile, &result->val.str, c, 1);
970 result->type = CPP_DOT;
973 buffer->extra_char = get_effective_char (buffer);
974 if (buffer->extra_char == '.')
976 buffer->extra_char = EOF;
977 ACCEPT_CHAR (CPP_ELLIPSIS);
980 /* We'll catch the extra_char when we're called back. */
981 buffer->read_ahead = '.';
983 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
984 ACCEPT_CHAR (CPP_DOT_STAR);
989 _cpp_lex_token (pfile, result)
995 const unsigned char *comment_start;
999 bol = pfile->state.next_bol;
1001 buffer = pfile->buffer;
1002 pfile->state.next_bol = 0;
1003 result->flags = buffer->saved_flags;
1004 buffer->saved_flags = 0;
1006 pfile->lexer_pos.line = buffer->lineno;
1008 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
1010 c = buffer->read_ahead;
1011 if (c == EOF && buffer->cur < buffer->rlimit)
1014 pfile->lexer_pos.col++;
1018 buffer->read_ahead = EOF;
1022 /* Non-empty files should end in a newline. Checking "bol" too
1023 prevents multiple warnings when hitting the EOF more than
1024 once, like in a directive. Don't warn for command line and
1026 if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
1027 cpp_pedwarn (pfile, "no newline at end of file");
1028 pfile->state.next_bol = 1;
1029 pfile->skipping = 0; /* In case missing #endif. */
1030 result->type = CPP_EOF;
1031 /* Don't do MI optimisation. */
1034 case ' ': case '\t': case '\f': case '\v': case '\0':
1035 skip_whitespace (pfile, c);
1036 result->flags |= PREV_WHITE;
1039 case '\n': case '\r':
1040 if (!pfile->state.in_directive)
1042 handle_newline (buffer, c);
1044 pfile->lexer_pos.output_line = buffer->lineno;
1045 /* This is a new line, so clear any white space flag.
1046 Newlines in arguments are white space (6.10.3.10);
1047 parse_arg takes care of that. */
1048 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1052 /* Don't let directives spill over to the next line. */
1053 buffer->read_ahead = c;
1054 pfile->state.next_bol = 1;
1055 result->type = CPP_EOF;
1056 /* Don't break; pfile->skipping might be true. */
1061 /* These could start an escaped newline, or '?' a trigraph. Let
1062 skip_escaped_newlines do all the work. */
1064 unsigned int lineno = buffer->lineno;
1066 c = skip_escaped_newlines (buffer, c);
1067 if (lineno != buffer->lineno)
1068 /* We had at least one escaped newline of some sort, and the
1069 next character is in buffer->read_ahead. Update the
1070 token's line and column. */
1073 /* We are either the original '?' or '\\', or a trigraph. */
1074 result->type = CPP_QUERY;
1075 buffer->read_ahead = EOF;
1083 case '0': case '1': case '2': case '3': case '4':
1084 case '5': case '6': case '7': case '8': case '9':
1085 result->type = CPP_NUMBER;
1086 parse_number (pfile, &result->val.str, c, 0);
1090 if (!CPP_OPTION (pfile, dollars_in_ident))
1092 /* Fall through... */
1095 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1096 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1097 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1098 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1100 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1101 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1102 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1103 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1105 result->type = CPP_NAME;
1106 result->val.node = parse_identifier (pfile, c);
1108 /* 'L' may introduce wide characters or strings. */
1109 if (result->val.node == pfile->spec_nodes.n_L)
1111 c = buffer->read_ahead; /* For make_string. */
1112 if (c == '\'' || c == '"')
1114 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1118 /* SDCC _asm specific */
1119 /* handle _asm ... _endasm ; */
1120 else if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1122 int read_ahead = buffer->read_ahead;
1124 comment_start = buffer->cur;
1125 result->type = CPP_ASM;
1126 skip_asm_block (pfile, read_ahead);
1127 /* Save the _asm block as a token in its own right. */
1128 save_asm (pfile, result, comment_start, read_ahead);
1130 /* Convert named operators to their proper types. */
1131 else if (result->val.node->flags & NODE_OPERATOR)
1133 result->flags |= NAMED_OP;
1134 result->type = result->val.node->value.operator;
1140 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1142 parse_string (pfile, result, c);
1146 /* A potential block or line comment. */
1147 comment_start = buffer->cur;
1148 result->type = CPP_DIV;
1149 c = get_effective_char (buffer);
1151 ACCEPT_CHAR (CPP_DIV_EQ);
1152 if (c != '/' && c != '*')
1157 if (skip_block_comment (pfile))
1158 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1159 pfile->lexer_pos.col,
1160 "unterminated comment");
1164 if (!CPP_OPTION (pfile, cplusplus_comments)
1165 && !CPP_IN_SYSTEM_HEADER (pfile))
1168 /* Warn about comments only if pedantically GNUC89, and not
1169 in system headers. */
1170 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1171 && ! buffer->warned_cplusplus_comments)
1174 "C++ style comments are not allowed in ISO C89");
1176 "(this will be reported only once per input file)");
1177 buffer->warned_cplusplus_comments = 1;
1180 /* Skip_line_comment updates buffer->read_ahead. */
1181 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1182 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1183 pfile->lexer_pos.col,
1184 "multi-line comment");
1187 /* Skipping the comment has updated buffer->read_ahead. */
1188 if (!pfile->state.save_comments)
1190 result->flags |= PREV_WHITE;
1194 /* Save the comment as a token in its own right. */
1195 save_comment (pfile, result, comment_start);
1197 when executed with -C option, comments
1198 were included even if they where in skipped #if block.
1199 Applied solution from GCC cpp 3.3.2 */
1203 if (pfile->state.angled_headers)
1205 result->type = CPP_HEADER_NAME;
1206 c = '>'; /* terminator. */
1210 result->type = CPP_LESS;
1211 c = get_effective_char (buffer);
1213 ACCEPT_CHAR (CPP_LESS_EQ);
1216 ACCEPT_CHAR (CPP_LSHIFT);
1217 if (get_effective_char (buffer) == '=')
1218 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1220 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1222 ACCEPT_CHAR (CPP_MIN);
1223 if (get_effective_char (buffer) == '=')
1224 ACCEPT_CHAR (CPP_MIN_EQ);
1226 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1228 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1229 result->flags |= DIGRAPH;
1231 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1233 ACCEPT_CHAR (CPP_OPEN_BRACE);
1234 result->flags |= DIGRAPH;
1239 result->type = CPP_GREATER;
1240 c = get_effective_char (buffer);
1242 ACCEPT_CHAR (CPP_GREATER_EQ);
1245 ACCEPT_CHAR (CPP_RSHIFT);
1246 if (get_effective_char (buffer) == '=')
1247 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1249 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1251 ACCEPT_CHAR (CPP_MAX);
1252 if (get_effective_char (buffer) == '=')
1253 ACCEPT_CHAR (CPP_MAX_EQ);
1258 lex_percent (buffer, result);
1259 if (result->type == CPP_HASH)
1264 lex_dot (pfile, result);
1268 result->type = CPP_PLUS;
1269 c = get_effective_char (buffer);
1271 ACCEPT_CHAR (CPP_PLUS_EQ);
1273 ACCEPT_CHAR (CPP_PLUS_PLUS);
1277 result->type = CPP_MINUS;
1278 c = get_effective_char (buffer);
1281 ACCEPT_CHAR (CPP_DEREF);
1282 if (CPP_OPTION (pfile, cplusplus)
1283 && get_effective_char (buffer) == '*')
1284 ACCEPT_CHAR (CPP_DEREF_STAR);
1287 ACCEPT_CHAR (CPP_MINUS_EQ);
1289 ACCEPT_CHAR (CPP_MINUS_MINUS);
1293 result->type = CPP_MULT;
1294 if (get_effective_char (buffer) == '=')
1295 ACCEPT_CHAR (CPP_MULT_EQ);
1299 result->type = CPP_EQ;
1300 if (get_effective_char (buffer) == '=')
1301 ACCEPT_CHAR (CPP_EQ_EQ);
1305 result->type = CPP_NOT;
1306 if (get_effective_char (buffer) == '=')
1307 ACCEPT_CHAR (CPP_NOT_EQ);
1311 result->type = CPP_AND;
1312 c = get_effective_char (buffer);
1314 ACCEPT_CHAR (CPP_AND_EQ);
1316 ACCEPT_CHAR (CPP_AND_AND);
1320 c = buffer->extra_char; /* Can be set by error condition below. */
1323 buffer->read_ahead = c;
1324 buffer->extra_char = EOF;
1327 c = get_effective_char (buffer);
1331 ACCEPT_CHAR (CPP_PASTE);
1335 result->type = CPP_HASH;
1339 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1340 tokens within the list of arguments that would otherwise act
1341 as preprocessing directives, the behavior is undefined.
1343 This implementation will report a hard error, terminate the
1344 macro invocation, and proceed to process the directive. */
1345 if (pfile->state.parsing_args)
1347 if (pfile->state.parsing_args == 2)
1349 "directives may not be used inside a macro argument");
1351 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1352 buffer->extra_char = buffer->read_ahead;
1353 buffer->read_ahead = '#';
1354 pfile->state.next_bol = 1;
1355 result->type = CPP_EOF;
1357 /* Get whitespace right - newline_in_args sets it. */
1358 if (pfile->lexer_pos.col == 1)
1359 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1363 /* This is the hash introducing a directive. */
1364 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1365 goto done_directive; /* bol still 1. */
1366 /* This is in fact an assembler #. */
1371 result->type = CPP_OR;
1372 c = get_effective_char (buffer);
1374 ACCEPT_CHAR (CPP_OR_EQ);
1376 ACCEPT_CHAR (CPP_OR_OR);
1380 result->type = CPP_XOR;
1381 if (get_effective_char (buffer) == '=')
1382 ACCEPT_CHAR (CPP_XOR_EQ);
1386 result->type = CPP_COLON;
1387 c = get_effective_char (buffer);
1388 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1389 ACCEPT_CHAR (CPP_SCOPE);
1390 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1392 result->flags |= DIGRAPH;
1393 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1397 case '~': result->type = CPP_COMPL; break;
1398 case ',': result->type = CPP_COMMA; break;
1399 case '(': result->type = CPP_OPEN_PAREN; break;
1400 case ')': result->type = CPP_CLOSE_PAREN; break;
1401 case '[': result->type = CPP_OPEN_SQUARE; break;
1402 case ']': result->type = CPP_CLOSE_SQUARE; break;
1403 case '{': result->type = CPP_OPEN_BRACE; break;
1404 case '}': result->type = CPP_CLOSE_BRACE; break;
1405 case ';': result->type = CPP_SEMICOLON; break;
1407 /* @ is a punctuator in Objective C. */
1408 case '@': result->type = CPP_ATSIGN; break;
1412 result->type = CPP_OTHER;
1417 if (pfile->skipping)
1420 /* If not in a directive, this token invalidates controlling macros. */
1421 if (!pfile->state.in_directive)
1422 pfile->mi_state = MI_FAILED;
1425 /* An upper bound on the number of bytes needed to spell a token,
1426 including preceding whitespace. */
1428 cpp_token_len (token)
1429 const cpp_token *token;
1433 switch (TOKEN_SPELL (token))
1435 default: len = 0; break;
1436 case SPELL_STRING: len = token->val.str.len; break;
1437 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1439 /* 1 for whitespace, 4 for comment delimeters. */
1443 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1444 already contain the enough space to hold the token's spelling.
1445 Returns a pointer to the character after the last character
1448 cpp_spell_token (pfile, token, buffer)
1449 cpp_reader *pfile; /* Would be nice to be rid of this... */
1450 const cpp_token *token;
1451 unsigned char *buffer;
1453 switch (TOKEN_SPELL (token))
1455 case SPELL_OPERATOR:
1457 const unsigned char *spelling;
1460 if (token->flags & DIGRAPH)
1462 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1463 else if (token->flags & NAMED_OP)
1466 spelling = TOKEN_NAME (token);
1468 while ((c = *spelling++) != '\0')
1475 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1476 buffer += NODE_LEN (token->val.node);
1481 int left, right, tag;
1482 switch (token->type)
1484 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1485 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1486 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1487 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1488 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1489 default: left = '\0'; right = '\0'; tag = '\0'; break;
1491 if (tag) *buffer++ = tag;
1492 if (left) *buffer++ = left;
1493 memcpy (buffer, token->val.str.text, token->val.str.len);
1494 buffer += token->val.str.len;
1495 if (right) *buffer++ = right;
1500 *buffer++ = token->val.c;
1504 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1511 /* Returns a token as a null-terminated string. The string is
1512 temporary, and automatically freed later. Useful for diagnostics. */
1514 cpp_token_as_text (pfile, token)
1516 const cpp_token *token;
1518 unsigned int len = cpp_token_len (token);
1519 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1521 end = cpp_spell_token (pfile, token, start);
1527 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1529 cpp_type2name (type)
1530 enum cpp_ttype type;
1532 return (const char *) token_spellings[type].name;
1535 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1536 for efficiency - to avoid double-buffering. Also, outputs a space
1537 if PREV_WHITE is flagged. */
1539 cpp_output_token (token, fp)
1540 const cpp_token *token;
1543 if (token->flags & PREV_WHITE)
1546 switch (TOKEN_SPELL (token))
1548 case SPELL_OPERATOR:
1550 const unsigned char *spelling;
1552 if (token->flags & DIGRAPH)
1554 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1555 else if (token->flags & NAMED_OP)
1558 spelling = TOKEN_NAME (token);
1560 ufputs (spelling, fp);
1566 ufputs (NODE_NAME (token->val.node), fp);
1571 int left, right, tag;
1572 switch (token->type)
1574 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1575 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1576 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1577 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1578 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1579 default: left = '\0'; right = '\0'; tag = '\0'; break;
1581 if (tag) putc (tag, fp);
1582 if (left) putc (left, fp);
1583 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1584 if (right) putc (right, fp);
1589 putc (token->val.c, fp);
1593 /* An error, most probably. */
1598 /* Compare two tokens. */
1600 _cpp_equiv_tokens (a, b)
1601 const cpp_token *a, *b;
1603 if (a->type == b->type && a->flags == b->flags)
1604 switch (TOKEN_SPELL (a))
1606 default: /* Keep compiler happy. */
1607 case SPELL_OPERATOR:
1610 return a->val.c == b->val.c; /* Character. */
1612 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1614 return a->val.node == b->val.node;
1616 return (a->val.str.len == b->val.str.len
1617 && !memcmp (a->val.str.text, b->val.str.text,
1624 /* Determine whether two tokens can be pasted together, and if so,
1625 what the resulting token is. Returns CPP_EOF if the tokens cannot
1626 be pasted, or the appropriate type for the merged token if they
1629 cpp_can_paste (pfile, token1, token2, digraph)
1631 const cpp_token *token1, *token2;
1634 enum cpp_ttype a = token1->type, b = token2->type;
1635 int cxx = CPP_OPTION (pfile, cplusplus);
1637 /* Treat named operators as if they were ordinary NAMEs. */
1638 if (token1->flags & NAMED_OP)
1640 if (token2->flags & NAMED_OP)
1643 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1644 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1649 if (b == a) return CPP_RSHIFT;
1650 if (b == CPP_QUERY && cxx) return CPP_MAX;
1651 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1654 if (b == a) return CPP_LSHIFT;
1655 if (b == CPP_QUERY && cxx) return CPP_MIN;
1656 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1657 if (CPP_OPTION (pfile, digraphs))
1660 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1662 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1666 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1667 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1668 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1671 if (b == a) return CPP_MINUS_MINUS;
1672 if (b == CPP_GREATER) return CPP_DEREF;
1675 if (b == a && cxx) return CPP_SCOPE;
1676 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1677 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1681 if (CPP_OPTION (pfile, digraphs))
1683 if (b == CPP_GREATER)
1684 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1686 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1690 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1693 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1694 if (b == CPP_NUMBER) return CPP_NUMBER;
1698 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1700 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1704 if (b == CPP_NAME) return CPP_NAME;
1706 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1708 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1710 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1714 if (b == CPP_NUMBER) return CPP_NUMBER;
1715 if (b == CPP_NAME) return CPP_NUMBER;
1716 if (b == CPP_DOT) return CPP_NUMBER;
1717 /* Numbers cannot have length zero, so this is safe. */
1718 if ((b == CPP_PLUS || b == CPP_MINUS)
1719 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1730 /* Returns nonzero if a space should be inserted to avoid an
1731 accidental token paste for output. For simplicity, it is
1732 conservative, and occasionally advises a space where one is not
1733 needed, e.g. "." and ".2". */
1736 cpp_avoid_paste (pfile, token1, token2)
1738 const cpp_token *token1, *token2;
1740 enum cpp_ttype a = token1->type, b = token2->type;
1743 if (token1->flags & NAMED_OP)
1745 if (token2->flags & NAMED_OP)
1749 if (token2->flags & DIGRAPH)
1750 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1751 else if (token_spellings[b].category == SPELL_OPERATOR)
1752 c = token_spellings[b].name[0];
1754 /* Quickly get everything that can paste with an '='. */
1755 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1760 case CPP_GREATER: return c == '>' || c == '?';
1761 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1762 case CPP_PLUS: return c == '+';
1763 case CPP_MINUS: return c == '-' || c == '>';
1764 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1765 case CPP_MOD: return c == ':' || c == '>';
1766 case CPP_AND: return c == '&';
1767 case CPP_OR: return c == '|';
1768 case CPP_COLON: return c == ':' || c == '>';
1769 case CPP_DEREF: return c == '*';
1770 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1771 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1772 case CPP_NAME: return ((b == CPP_NUMBER
1773 && name_p (pfile, &token2->val.str))
1775 || b == CPP_CHAR || b == CPP_STRING); /* L */
1776 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1777 || c == '.' || c == '+' || c == '-');
1778 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1779 && token1->val.c == '@'
1780 && (b == CPP_NAME || b == CPP_STRING));
1787 /* Output all the remaining tokens on the current line, and a newline
1788 character, to FP. Leading whitespace is removed. */
1790 cpp_output_line (pfile, fp)
1796 cpp_get_token (pfile, &token);
1797 token.flags &= ~PREV_WHITE;
1798 while (token.type != CPP_EOF)
1800 cpp_output_token (&token, fp);
1801 cpp_get_token (pfile, &token);
1807 /* Returns the value of a hexadecimal digit. */
1812 if (c >= 'a' && c <= 'f')
1813 return c - 'a' + 10;
1814 if (c >= 'A' && c <= 'F')
1815 return c - 'A' + 10;
1816 if (c >= '0' && c <= '9')
1821 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1822 failure if cpplib is not parsing C++ or C99. Such failure is
1823 silent, and no variables are updated. Otherwise returns 0, and
1824 warns if -Wtraditional.
1826 [lex.charset]: The character designated by the universal character
1827 name \UNNNNNNNN is that character whose character short name in
1828 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1829 universal character name \uNNNN is that character whose character
1830 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1831 for a universal character name is less than 0x20 or in the range
1832 0x7F-0x9F (inclusive), or if the universal character name
1833 designates a character in the basic source character set, then the
1834 program is ill-formed.
1836 We assume that wchar_t is Unicode, so we don't need to do any
1837 mapping. Is this ever wrong?
1839 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1840 LIMIT is the end of the string or charconst. PSTR is updated to
1841 point after the UCS on return, and the UCS is written into PC. */
1844 maybe_read_ucs (pfile, pstr, limit, pc)
1846 const unsigned char **pstr;
1847 const unsigned char *limit;
1850 const unsigned char *p = *pstr;
1851 unsigned int code = 0;
1852 unsigned int c = *pc, length;
1854 /* Only attempt to interpret a UCS for C++ and C99. */
1855 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1858 if (CPP_WTRADITIONAL (pfile))
1859 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1861 length = (c == 'u' ? 4: 8);
1863 if ((size_t) (limit - p) < length)
1865 cpp_error (pfile, "incomplete universal-character-name");
1866 /* Skip to the end to avoid more diagnostics. */
1871 for (; length; length--, p++)
1875 code = (code << 4) + hex_digit_value (c);
1879 "non-hex digit '%c' in universal-character-name", c);
1880 /* We shouldn't skip in case there are multibyte chars. */
1886 #ifdef TARGET_EBCDIC
1887 cpp_error (pfile, "universal-character-name on EBCDIC target");
1888 code = 0x3f; /* EBCDIC invalid character */
1890 /* True extended characters are OK. */
1892 && !(code & 0x80000000)
1893 && !(code >= 0xD800 && code <= 0xDFFF))
1895 /* The standard permits $, @ and ` to be specified as UCNs. We use
1896 hex escapes so that this also works with EBCDIC hosts. */
1897 else if (code == 0x24 || code == 0x40 || code == 0x60)
1899 /* Don't give another error if one occurred above. */
1900 else if (length == 0)
1901 cpp_error (pfile, "universal-character-name out of range");
1909 /* Interpret an escape sequence, and return its value. PSTR points to
1910 the input pointer, which is just after the backslash. LIMIT is how
1911 much text we have. MASK is a bitmask for the precision for the
1912 destination type (char or wchar_t). TRADITIONAL, if true, does not
1913 interpret escapes that did not exist in traditional C.
1915 Handles all relevant diagnostics. */
1918 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1920 const unsigned char **pstr;
1921 const unsigned char *limit;
1922 unsigned HOST_WIDE_INT mask;
1926 const unsigned char *str = *pstr;
1927 unsigned int c = *str++;
1931 case '\\': case '\'': case '"': case '?': break;
1932 case 'b': c = TARGET_BS; break;
1933 case 'f': c = TARGET_FF; break;
1934 case 'n': c = TARGET_NEWLINE; break;
1935 case 'r': c = TARGET_CR; break;
1936 case 't': c = TARGET_TAB; break;
1937 case 'v': c = TARGET_VT; break;
1939 case '(': case '{': case '[': case '%':
1940 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1941 '\%' is used to prevent SCCS from getting confused. */
1942 unknown = CPP_PEDANTIC (pfile);
1946 if (CPP_WTRADITIONAL (pfile))
1947 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1953 if (CPP_PEDANTIC (pfile))
1954 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1959 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1963 if (CPP_WTRADITIONAL (pfile))
1964 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1968 unsigned int i = 0, overflow = 0;
1969 int digits_found = 0;
1977 overflow |= i ^ (i << 4 >> 4);
1978 i = (i << 4) + hex_digit_value (c);
1983 cpp_error (pfile, "\\x used with no following hex digits");
1985 if (overflow | (i != (i & mask)))
1987 cpp_pedwarn (pfile, "hex escape sequence out of range");
1994 case '0': case '1': case '2': case '3':
1995 case '4': case '5': case '6': case '7':
1997 unsigned int i = c - '0';
2000 while (str < limit && ++count < 3)
2003 if (c < '0' || c > '7')
2006 i = (i << 3) + c - '0';
2009 if (i != (i & mask))
2011 cpp_pedwarn (pfile, "octal escape sequence out of range");
2026 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
2028 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
2032 cpp_pedwarn (pfile, "escape sequence out of range for character");
2038 #ifndef MAX_CHAR_TYPE_SIZE
2039 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
2042 #ifndef MAX_WCHAR_TYPE_SIZE
2043 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
2046 /* Interpret a (possibly wide) character constant in TOKEN.
2047 WARN_MULTI warns about multi-character charconsts, if not
2048 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
2049 that did not exist in traditional C. PCHARS_SEEN points to a
2050 variable that is filled in with the number of characters seen. */
2052 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
2054 const cpp_token *token;
2057 unsigned int *pchars_seen;
2059 const unsigned char *str = token->val.str.text;
2060 const unsigned char *limit = str + token->val.str.len;
2061 unsigned int chars_seen = 0;
2062 unsigned int width, max_chars, c;
2063 unsigned HOST_WIDE_INT mask;
2064 HOST_WIDE_INT result = 0;
2066 #ifdef MULTIBYTE_CHARS
2067 (void) local_mbtowc (NULL, NULL, 0);
2070 /* Width in bits. */
2071 if (token->type == CPP_CHAR)
2072 width = MAX_CHAR_TYPE_SIZE;
2074 width = MAX_WCHAR_TYPE_SIZE;
2076 if (width < HOST_BITS_PER_WIDE_INT)
2077 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2080 max_chars = HOST_BITS_PER_WIDE_INT / width;
2084 #ifdef MULTIBYTE_CHARS
2088 char_len = local_mbtowc (&wc, str, limit - str);
2091 cpp_warning (pfile, "ignoring invalid multibyte character");
2104 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2106 #ifdef MAP_CHARACTER
2108 c = MAP_CHARACTER (c);
2111 /* Merge character into result; ignore excess chars. */
2112 if (++chars_seen <= max_chars)
2114 if (width < HOST_BITS_PER_WIDE_INT)
2115 result = (result << width) | (c & mask);
2121 if (chars_seen == 0)
2122 cpp_error (pfile, "empty character constant");
2123 else if (chars_seen > max_chars)
2125 chars_seen = max_chars;
2126 cpp_warning (pfile, "character constant too long");
2128 else if (chars_seen > 1 && !traditional && warn_multi)
2129 cpp_warning (pfile, "multi-character character constant");
2131 /* If char type is signed, sign-extend the constant. The
2132 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2133 if (token->type == CPP_CHAR && chars_seen)
2135 unsigned int nbits = chars_seen * width;
2136 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2138 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2139 || ((result >> (nbits - 1)) & 1) == 0)
2145 *pchars_seen = chars_seen;
2161 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2164 chunk_suitable (pool, chunk, size)
2169 /* Being at least twice SIZE means we can use memcpy in
2170 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2172 return (chunk && pool->locked != chunk
2173 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2176 /* Returns the end of the new pool. PTR points to a char in the old
2177 pool, and is updated to point to the same char in the new pool. */
2179 _cpp_next_chunk (pool, len, ptr)
2182 unsigned char **ptr;
2184 cpp_chunk *chunk = pool->cur->next;
2186 /* LEN is the minimum size we want in the new pool. */
2187 len += POOL_ROOM (pool);
2188 if (! chunk_suitable (pool, chunk, len))
2190 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2192 chunk->next = pool->cur->next;
2193 pool->cur->next = chunk;
2196 /* Update the pointer before changing chunk's front. */
2198 *ptr += chunk->base - POOL_FRONT (pool);
2200 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2201 chunk->front = chunk->base;
2204 return POOL_LIMIT (pool);
2211 unsigned char *base;
2214 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2215 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2216 /* Put the chunk descriptor at the end. Then chunk overruns will
2217 cause obvious chaos. */
2218 result = (cpp_chunk *) (base + size);
2219 result->base = base;
2220 result->front = base;
2221 result->limit = base + size;
2228 _cpp_init_pool (pool, size, align, temp)
2230 unsigned int size, align, temp;
2233 align = DEFAULT_ALIGNMENT;
2234 if (align & (align - 1))
2236 pool->align = align;
2237 pool->cur = new_chunk (size);
2241 pool->cur->next = pool->cur;
2245 _cpp_lock_pool (pool)
2248 if (pool->locks++ == 0)
2249 pool->locked = pool->cur;
2253 _cpp_unlock_pool (pool)
2256 if (--pool->locks == 0)
2261 _cpp_free_pool (pool)
2264 cpp_chunk *chunk = pool->cur, *next;
2272 while (chunk && chunk != pool->cur);
2275 /* Reserve LEN bytes from a memory pool. */
2277 _cpp_pool_reserve (pool, len)
2281 len = POOL_ALIGN (len, pool->align);
2282 if (len > (unsigned int) POOL_ROOM (pool))
2283 _cpp_next_chunk (pool, len, 0);
2285 return POOL_FRONT (pool);
2288 /* Allocate LEN bytes from a memory pool. */
2290 _cpp_pool_alloc (pool, len)
2294 unsigned char *result = _cpp_pool_reserve (pool, len);
2296 POOL_COMMIT (pool, len);