1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 /* MULTIBYTE_CHARS support only works for native compilers.
30 ??? Ideally what we want is to model widechar support after
31 the current floating point support. */
33 #undef MULTIBYTE_CHARS
36 #ifdef MULTIBYTE_CHARS
41 /* Tokens with SPELL_STRING store their spelling in the token list,
42 and it's length in the token->val.name.len. */
55 enum spell_type category;
56 const unsigned char *name;
59 static const unsigned char *const digraph_spellings[] =
60 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
62 #define OP(e, s) { SPELL_OPERATOR, U s },
63 #define TK(e, s) { s, U STRINGX (e) },
64 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
68 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
69 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
70 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
72 static void handle_newline PARAMS ((cpp_reader *));
73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
74 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
76 static int skip_asm_block PARAMS ((cpp_reader *));
77 static int skip_block_comment PARAMS ((cpp_reader *));
78 static int skip_line_comment PARAMS ((cpp_reader *));
79 static void adjust_column PARAMS ((cpp_reader *));
80 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
81 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
82 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
84 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
85 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
86 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
87 static void unterminated PARAMS ((cpp_reader *, int));
88 static bool trigraph_p PARAMS ((cpp_reader *));
89 static unsigned int copy_text_chars PARAMS ((char *, const char *, unsigned int));
90 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
91 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
92 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
93 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
94 const unsigned char *, unsigned int *));
95 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
97 static unsigned int hex_digit_value PARAMS ((unsigned int));
98 static _cpp_buff *new_buff PARAMS ((size_t));
102 Compares, the token TOKEN to the NUL-terminated string STRING.
103 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
105 cpp_ideq (token, string)
106 const cpp_token *token;
109 if (token->type != CPP_NAME)
112 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
115 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
116 Returns with buffer->cur pointing to the character immediately
117 following the newline (combination). */
119 handle_newline (pfile)
122 cpp_buffer *buffer = pfile->buffer;
124 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
125 only accept CR-LF; maybe we should fall back to that behaviour? */
126 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
129 buffer->line_base = buffer->cur;
130 buffer->col_adjust = 0;
134 /* Subroutine of skip_escaped_newlines; called when a 3-character
135 sequence beginning with "??" is encountered. buffer->cur points to
138 Warn if necessary, and returns true if the sequence forms a
139 trigraph and the trigraph should be honoured. */
144 cpp_buffer *buffer = pfile->buffer;
145 cppchar_t from_char = buffer->cur[1];
148 if (!_cpp_trigraph_map[from_char])
151 accept = CPP_OPTION (pfile, trigraphs);
153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
157 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
158 "trigraph ??%c converted to %c",
160 (int) _cpp_trigraph_map[from_char]);
161 else if (buffer->cur != buffer->last_Wtrigraphs)
163 buffer->last_Wtrigraphs = buffer->cur;
164 cpp_warning_with_line (pfile, pfile->line,
165 CPP_BUF_COL (buffer) - 1,
166 "trigraph ??%c ignored", (int) from_char);
173 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
174 lie in buffer->cur[-1]. Returns the next byte, which will be in
175 buffer->cur[-1]. This routine performs preprocessing stages 1 and
176 2 of the ISO C standard. */
178 skip_escaped_newlines (pfile)
181 cpp_buffer *buffer = pfile->buffer;
182 cppchar_t next = buffer->cur[-1];
184 /* Only do this if we apply stages 1 and 2. */
185 if (!buffer->from_stage3)
187 const unsigned char *saved_cur;
194 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
197 /* Translate the trigraph. */
198 next = _cpp_trigraph_map[buffer->cur[1]];
204 if (buffer->cur == buffer->rlimit)
207 /* We have a backslash, and room for at least one more
208 character. Skip horizontal whitespace. */
209 saved_cur = buffer->cur;
211 next1 = *buffer->cur++;
212 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
214 if (!is_vspace (next1))
216 buffer->cur = saved_cur;
220 if (saved_cur != buffer->cur - 1
221 && !pfile->state.lexing_comment)
222 cpp_warning (pfile, "backslash and newline separated by space");
224 handle_newline (pfile);
225 buffer->backup_to = buffer->cur;
226 if (buffer->cur == buffer->rlimit)
228 cpp_pedwarn (pfile, "backslash-newline at end of file");
232 next = *buffer->cur++;
234 while (next == '\\' || next == '?');
240 /* Obtain the next character, after trigraph conversion and skipping
241 an arbitrarily long string of escaped newlines. The common case of
242 no trigraphs or escaped newlines falls through quickly. On return,
243 buffer->backup_to points to where to return to if the character is
244 not to be processed. */
246 get_effective_char (pfile)
250 cpp_buffer *buffer = pfile->buffer;
252 buffer->backup_to = buffer->cur;
253 next = *buffer->cur++;
254 if (__builtin_expect (next == '?' || next == '\\', 0))
255 next = skip_escaped_newlines (pfile);
260 /* SDCC _asm specific */
261 /* Skip an _asm ... _endasm block. We find the end of the comment by
262 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
265 skip_asm_block (pfile)
268 #define _ENDASM_STR "endasm"
269 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
271 cpp_buffer *buffer = pfile->buffer;
276 pfile->state.lexing_comment = 1;
277 while (buffer->cur != buffer->rlimit)
279 prev_space = is_space(c);
283 /* FIXME: For speed, create a new character class of characters
284 of interest inside block comments. */
285 if (c == '?' || c == '\\')
286 c = skip_escaped_newlines (pfile);
288 if (prev_space && c == '_')
290 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
291 strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
293 buffer->cur += _ENDASM_LEN;
298 else if (is_vspace (c))
300 prev_space = is_space(c);
301 handle_newline (pfile);
305 adjust_column (pfile);
308 pfile->state.lexing_comment = 0;
312 /* Skip a C-style block comment. We find the end of the comment by
313 seeing if an asterisk is before every '/' we encounter. Returns
314 non-zero if comment terminated by EOF, zero otherwise. */
316 skip_block_comment (pfile)
319 cpp_buffer *buffer = pfile->buffer;
320 cppchar_t c = EOF, prevc = EOF;
322 pfile->state.lexing_comment = 1;
323 while (buffer->cur != buffer->rlimit)
325 prevc = c, c = *buffer->cur++;
327 /* FIXME: For speed, create a new character class of characters
328 of interest inside block comments. */
329 if (c == '?' || c == '\\')
330 c = skip_escaped_newlines (pfile);
332 /* People like decorating comments with '*', so check for '/'
333 instead for efficiency. */
339 /* Warn about potential nested comments, but not if the '/'
340 comes immediately before the true comment delimiter.
341 Don't bother to get it right across escaped newlines. */
342 if (CPP_OPTION (pfile, warn_comments)
343 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
344 cpp_warning_with_line (pfile,
345 pfile->line, CPP_BUF_COL (buffer),
346 "\"/*\" within comment");
348 else if (is_vspace (c))
349 handle_newline (pfile);
351 adjust_column (pfile);
354 pfile->state.lexing_comment = 0;
355 return c != '/' || prevc != '*';
358 /* Skip a C++ line comment, leaving buffer->cur pointing to the
359 terminating newline. Handles escaped newlines. Returns non-zero
360 if a multiline comment. */
362 skip_line_comment (pfile)
365 cpp_buffer *buffer = pfile->buffer;
366 unsigned int orig_line = pfile->line;
369 pfile->state.lexing_comment = 1;
372 if (buffer->cur == buffer->rlimit)
376 if (c == '?' || c == '\\')
377 c = skip_escaped_newlines (pfile);
379 while (!is_vspace (c));
381 /* Step back over the newline, except at EOF. */
385 pfile->state.lexing_comment = 0;
386 return orig_line != pfile->line;
389 /* pfile->buffer->cur is one beyond the \t character. Update
390 col_adjust so we track the column correctly. */
392 adjust_column (pfile)
395 cpp_buffer *buffer = pfile->buffer;
396 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
398 /* Round it up to multiple of the tabstop, but subtract 1 since the
399 tab itself occupies a character position. */
400 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
401 - col % CPP_OPTION (pfile, tabstop)) - 1;
404 /* Skips whitespace, saving the next non-whitespace character.
405 Adjusts pfile->col_adjust to account for tabs. Without this,
406 tokens might be assigned an incorrect column. */
408 skip_whitespace (pfile, c)
412 cpp_buffer *buffer = pfile->buffer;
413 unsigned int warned = 0;
417 /* Horizontal space always OK. */
421 adjust_column (pfile);
422 /* Just \f \v or \0 left. */
425 if (buffer->cur - 1 == buffer->rlimit)
429 cpp_warning (pfile, "null character(s) ignored");
433 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
434 cpp_pedwarn_with_line (pfile, pfile->line,
435 CPP_BUF_COL (buffer),
436 "%s in preprocessing directive",
437 c == '\f' ? "form feed" : "vertical tab");
441 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
442 while (is_nvspace (c));
448 /* See if the characters of a number token are valid in a name (no
451 name_p (pfile, string)
453 const cpp_string *string;
457 for (i = 0; i < string->len; i++)
458 if (!is_idchar (string->text[i]))
464 /* Parse an identifier, skipping embedded backslash-newlines. This is
465 a critical inner loop. The common case is an identifier which has
466 not been split by backslash-newline, does not contain a dollar
467 sign, and has already been scanned (roughly 10:1 ratio of
468 seen:unseen identifiers in normal code; the distribution is
469 Poisson-like). Second most common case is a new identifier, not
470 split and no dollar sign. The other possibilities are rare and
471 have been relegated to parse_identifier_slow. */
472 static cpp_hashnode *
473 parse_identifier (pfile)
476 cpp_hashnode *result;
479 /* Fast-path loop. Skim over a normal identifier.
480 N.B. ISIDNUM does not include $. */
481 cur = pfile->buffer->cur;
482 while (ISIDNUM (*cur))
485 /* Check for slow-path cases. */
486 if (*cur == '?' || *cur == '\\' || *cur == '$')
487 result = parse_identifier_slow (pfile, cur);
490 const U_CHAR *base = pfile->buffer->cur - 1;
491 result = (cpp_hashnode *)
492 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
493 pfile->buffer->cur = cur;
496 /* Rarely, identifiers require diagnostics when lexed.
497 XXX Has to be forced out of the fast path. */
498 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
499 && !pfile->state.skipping, 0))
501 /* It is allowed to poison the same identifier twice. */
502 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
503 cpp_error (pfile, "attempt to use poisoned \"%s\"",
506 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
507 replacement list of a variadic macro. */
508 if (result == pfile->spec_nodes.n__VA_ARGS__
509 && !pfile->state.va_args_ok)
511 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
517 /* Slow path. This handles identifiers which have been split, and
518 identifiers which contain dollar signs. The part of the identifier
519 from PFILE->buffer->cur-1 to CUR has already been scanned. */
520 static cpp_hashnode *
521 parse_identifier_slow (pfile, cur)
525 cpp_buffer *buffer = pfile->buffer;
526 const U_CHAR *base = buffer->cur - 1;
527 struct obstack *stack = &pfile->hash_table->stack;
528 unsigned int c, saw_dollar = 0, len;
530 /* Copy the part of the token which is known to be okay. */
531 obstack_grow (stack, base, cur - base);
533 /* Now process the part which isn't. We are looking at one of
534 '$', '\\', or '?' on entry to this loop. */
539 while (is_idchar (c))
541 obstack_1grow (stack, c);
549 /* Potential escaped newline? */
550 buffer->backup_to = buffer->cur - 1;
551 if (c != '?' && c != '\\')
553 c = skip_escaped_newlines (pfile);
555 while (is_idchar (c));
557 /* Step back over the unwanted char. */
560 /* $ is not an identifier character in the standard, but is commonly
561 accepted as an extension. Don't warn about it in skipped
562 conditional blocks. */
563 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
564 cpp_pedwarn (pfile, "'$' character(s) in identifier");
566 /* Identifiers are null-terminated. */
567 len = obstack_object_size (stack);
568 obstack_1grow (stack, '\0');
570 return (cpp_hashnode *)
571 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
574 /* Parse a number, beginning with character C, skipping embedded
575 backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
576 before C. Place the result in NUMBER. */
578 parse_number (pfile, number, c, leading_period)
584 cpp_buffer *buffer = pfile->buffer;
585 unsigned char *dest, *limit;
587 dest = BUFF_FRONT (pfile->u_buff);
588 limit = BUFF_LIMIT (pfile->u_buff);
590 /* Place a leading period. */
595 _cpp_extend_buff (pfile, &pfile->u_buff, 1);
596 dest = BUFF_FRONT (pfile->u_buff);
597 limit = BUFF_LIMIT (pfile->u_buff);
606 /* Need room for terminating null. */
607 if ((size_t) (limit - dest) < 2)
609 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
610 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
611 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
612 limit = BUFF_LIMIT (pfile->u_buff);
618 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
620 /* Potential escaped newline? */
621 buffer->backup_to = buffer->cur - 1;
622 if (c != '?' && c != '\\')
624 c = skip_escaped_newlines (pfile);
626 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
628 /* Step back over the unwanted char. */
631 /* Null-terminate the number. */
634 number->text = BUFF_FRONT (pfile->u_buff);
635 number->len = dest - number->text;
636 BUFF_FRONT (pfile->u_buff) = dest + 1;
639 /* Subroutine of parse_string. Emits error for unterminated strings. */
641 unterminated (pfile, term)
645 cpp_error (pfile, "missing terminating %c character", term);
647 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
649 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
650 "possible start of unterminated string literal");
655 /* Subroutine of parse_string. */
657 unescaped_terminator_p (pfile, dest)
659 const unsigned char *dest;
661 const unsigned char *start, *temp;
663 /* In #include-style directives, terminators are not escapeable. */
664 if (pfile->state.angled_headers)
667 start = BUFF_FRONT (pfile->u_buff);
669 /* An odd number of consecutive backslashes represents an escaped
671 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
674 return ((dest - temp) & 1) == 0;
677 /* Parses a string, character constant, or angle-bracketed header file
678 name. Handles embedded trigraphs and escaped newlines. The stored
679 string is guaranteed NUL-terminated, but it is not guaranteed that
680 this is the first NUL since embedded NULs are preserved.
681 Multi-line strings are allowed, but they are deprecated.
683 When this function returns, buffer->cur points to the next
684 character to be processed. */
686 parse_string (pfile, token, terminator)
689 cppchar_t terminator;
691 cpp_buffer *buffer = pfile->buffer;
692 unsigned char *dest, *limit;
694 bool warned_nulls = false, warned_multi = false;
696 dest = BUFF_FRONT (pfile->u_buff);
697 limit = BUFF_LIMIT (pfile->u_buff);
701 /* We need room for another char, possibly the terminating NUL. */
702 if ((size_t) (limit - dest) < 1)
704 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
705 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
706 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
707 limit = BUFF_LIMIT (pfile->u_buff);
710 /* Handle trigraphs, escaped newlines etc. */
712 if (c == '?' || c == '\\')
713 c = skip_escaped_newlines (pfile);
717 if (unescaped_terminator_p (pfile, dest))
720 else if (is_vspace (c))
722 /* In assembly language, silently terminate string and
723 character literals at end of line. This is a kludge
724 around not knowing where comments are. */
725 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
731 /* Character constants and header names may not extend over
732 multiple lines. In Standard C, neither may strings.
733 Unfortunately, we accept multiline strings as an
734 extension, except in #include family directives. */
735 if (terminator != '"' || pfile->state.angled_headers)
737 unterminated (pfile, terminator);
745 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
748 if (pfile->mls_line == 0)
750 pfile->mls_line = token->line;
751 pfile->mls_col = token->col;
754 handle_newline (pfile);
759 if (buffer->cur - 1 == buffer->rlimit)
761 unterminated (pfile, terminator);
768 cpp_warning (pfile, "null character(s) preserved in literal");
777 token->val.str.text = BUFF_FRONT (pfile->u_buff);
778 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
779 BUFF_FRONT (pfile->u_buff) = dest + 1;
782 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
783 comment blocks (when executed with -C option) and
784 _asm (SDCPP specific) blocks */
786 /* Count and copy characters from src to dest, excluding CRs:
787 CRs are automatically generated, because the output is
788 opened in TEXT mode. If dest == NULL, only count chars */
790 copy_text_chars (dest, src, len)
798 for (p = src; p != src + len; ++p)
813 /* SDCC _asm specific */
814 /* The stored comment includes the comment start and any terminator. */
816 save_asm (pfile, token, from)
819 const unsigned char *from;
821 #define _ASM_STR "_asm"
822 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
824 unsigned char *buffer;
825 unsigned int text_len, len;
827 len = pfile->buffer->cur - from;
828 /* + _ASM_LEN for the initial '_asm'. */
829 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
830 buffer = _cpp_unaligned_alloc (pfile, text_len);
833 token->type = CPP_ASM;
834 token->val.str.len = text_len;
835 token->val.str.text = buffer;
837 memcpy (buffer, _ASM_STR, _ASM_LEN);
838 copy_text_chars (buffer + _ASM_LEN, from, len);
841 /* The stored comment includes the comment start and any terminator. */
843 save_comment (pfile, token, from)
846 const unsigned char *from;
848 unsigned char *buffer;
851 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
853 /* C++ comments probably (not definitely) have moved past a new
854 line, which we don't want to save in the comment. */
855 if (is_vspace (pfile->buffer->cur[-1]))
857 buffer = _cpp_unaligned_alloc (pfile, len);
859 token->type = CPP_COMMENT;
860 token->val.str.len = len;
861 token->val.str.text = buffer;
864 copy_text_chars (buffer + 1, from, len);
867 /* Allocate COUNT tokens for RUN. */
869 _cpp_init_tokenrun (run, count)
873 run->base = xnewvec (cpp_token, count);
874 run->limit = run->base + count;
878 /* Returns the next tokenrun, or creates one if there is none. */
883 if (run->next == NULL)
885 run->next = xnew (tokenrun);
886 run->next->prev = run;
887 _cpp_init_tokenrun (run->next, 250);
893 /* Allocate a single token that is invalidated at the same time as the
894 rest of the tokens on the line. Has its line and col set to the
895 same as the last lexed token, so that diagnostics appear in the
898 _cpp_temp_token (pfile)
901 cpp_token *old, *result;
903 old = pfile->cur_token - 1;
904 if (pfile->cur_token == pfile->cur_run->limit)
906 pfile->cur_run = next_tokenrun (pfile->cur_run);
907 pfile->cur_token = pfile->cur_run->base;
910 result = pfile->cur_token++;
911 result->line = old->line;
912 result->col = old->col;
916 /* Lex a token into RESULT (external interface). Takes care of issues
917 like directive handling, token lookahead, multiple include
918 optimization and skipping. */
920 _cpp_lex_token (pfile)
927 if (pfile->cur_token == pfile->cur_run->limit)
929 pfile->cur_run = next_tokenrun (pfile->cur_run);
930 pfile->cur_token = pfile->cur_run->base;
933 if (pfile->lookaheads)
936 result = pfile->cur_token++;
939 result = _cpp_lex_direct (pfile);
941 if (result->flags & BOL)
943 /* Is this a directive. If _cpp_handle_directive returns
944 false, it is an assembler #. */
945 if (result->type == CPP_HASH
946 && !pfile->state.parsing_args
947 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
949 if (pfile->cb.line_change && !pfile->state.skipping)
950 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
953 /* We don't skip tokens in directives. */
954 if (pfile->state.in_directive)
957 /* Outside a directive, invalidate controlling macros. At file
958 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
959 get here and MI optimisation works. */
960 pfile->mi_valid = false;
962 if (!pfile->state.skipping || result->type == CPP_EOF)
969 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
971 if (get_effective_char (pfile) == CHAR) \
972 result->type = THEN_TYPE; \
976 result->type = ELSE_TYPE; \
980 /* Lex a token into pfile->cur_token, which is also incremented, to
981 get diagnostics pointing to the correct location.
983 Does not handle issues such as token lookahead, multiple-include
984 optimisation, directives, skipping etc. This function is only
985 suitable for use by _cpp_lex_token, and in special cases like
986 lex_expansion_token which doesn't care for any of these issues.
988 When meeting a newline, returns CPP_EOF if parsing a directive,
989 otherwise returns to the start of the token buffer if permissible.
990 Returns the location of the lexed token. */
992 _cpp_lex_direct (pfile)
997 const unsigned char *comment_start;
998 cpp_token *result = pfile->cur_token++;
1001 buffer = pfile->buffer;
1002 result->flags = buffer->saved_flags;
1003 buffer->saved_flags = 0;
1005 result->line = pfile->line;
1009 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1014 case ' ': case '\t': case '\f': case '\v': case '\0':
1015 result->flags |= PREV_WHITE;
1016 if (skip_whitespace (pfile, c))
1021 buffer->saved_flags = BOL;
1022 if (!pfile->state.parsing_args && !pfile->state.in_directive)
1024 if (buffer->cur != buffer->line_base)
1026 /* Non-empty files should end in a newline. Don't warn
1027 for command line and _Pragma buffers. */
1028 if (!buffer->from_stage3)
1029 cpp_pedwarn (pfile, "no newline at end of file");
1030 handle_newline (pfile);
1033 /* Don't pop the last buffer. */
1036 unsigned char stop = buffer->return_at_eof;
1038 _cpp_pop_buffer (pfile);
1043 result->type = CPP_EOF;
1046 case '\n': case '\r':
1047 handle_newline (pfile);
1048 buffer->saved_flags = BOL;
1049 if (! pfile->state.in_directive)
1051 if (pfile->state.parsing_args == 2)
1052 buffer->saved_flags |= PREV_WHITE;
1053 if (!pfile->keep_tokens)
1055 pfile->cur_run = &pfile->base_run;
1056 result = pfile->base_run.base;
1057 pfile->cur_token = result + 1;
1061 result->type = CPP_EOF;
1066 /* These could start an escaped newline, or '?' a trigraph. Let
1067 skip_escaped_newlines do all the work. */
1069 unsigned int line = pfile->line;
1071 c = skip_escaped_newlines (pfile);
1072 if (line != pfile->line)
1075 /* We had at least one escaped newline of some sort.
1076 Update the token's line and column. */
1077 goto update_tokens_line;
1081 /* We are either the original '?' or '\\', or a trigraph. */
1083 result->type = CPP_QUERY;
1090 case '0': case '1': case '2': case '3': case '4':
1091 case '5': case '6': case '7': case '8': case '9':
1092 result->type = CPP_NUMBER;
1093 parse_number (pfile, &result->val.str, c, 0);
1097 /* 'L' may introduce wide characters or strings. */
1099 const unsigned char *pos = buffer->cur;
1101 c = get_effective_char (pfile);
1102 if (c == '\'' || c == '"')
1104 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1105 parse_string (pfile, result, c);
1114 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1115 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1116 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1117 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1119 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1120 case 'G': case 'H': case 'I': case 'J': case 'K':
1121 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1122 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1124 result->type = CPP_NAME;
1125 result->val.node = parse_identifier (pfile);
1127 /* SDCC _asm specific */
1128 /* handle _asm ... _endasm ; */
1129 if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1131 comment_start = buffer->cur;
1132 result->type = CPP_ASM;
1133 skip_asm_block (pfile);
1134 /* Save the _asm block as a token in its own right. */
1135 save_asm (pfile, result, comment_start);
1137 /* Convert named operators to their proper types. */
1138 else if (result->val.node->flags & NODE_OPERATOR)
1140 result->flags |= NAMED_OP;
1141 result->type = result->val.node->value.operator;
1147 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1148 parse_string (pfile, result, c);
1152 /* A potential block or line comment. */
1153 comment_start = buffer->cur;
1154 c = get_effective_char (pfile);
1158 if (skip_block_comment (pfile))
1159 cpp_error (pfile, "unterminated comment");
1161 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1162 || CPP_IN_SYSTEM_HEADER (pfile)))
1164 /* Warn about comments only if pedantically GNUC89, and not
1165 in system headers. */
1166 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1167 && ! buffer->warned_cplusplus_comments)
1170 "C++ style comments are not allowed in ISO C89");
1172 "(this will be reported only once per input file)");
1173 buffer->warned_cplusplus_comments = 1;
1176 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1177 cpp_warning (pfile, "multi-line comment");
1181 result->type = CPP_DIV_EQ;
1187 result->type = CPP_DIV;
1191 if (!pfile->state.save_comments)
1193 result->flags |= PREV_WHITE;
1194 goto update_tokens_line;
1197 /* Save the comment as a token in its own right. */
1198 save_comment (pfile, result, comment_start);
1202 if (pfile->state.angled_headers)
1204 result->type = CPP_HEADER_NAME;
1205 parse_string (pfile, result, '>');
1209 c = get_effective_char (pfile);
1211 result->type = CPP_LESS_EQ;
1213 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1214 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1215 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1216 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1218 result->type = CPP_OPEN_SQUARE;
1219 result->flags |= DIGRAPH;
1221 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1223 result->type = CPP_OPEN_BRACE;
1224 result->flags |= DIGRAPH;
1229 result->type = CPP_LESS;
1234 c = get_effective_char (pfile);
1236 result->type = CPP_GREATER_EQ;
1238 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1239 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1240 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1244 result->type = CPP_GREATER;
1249 c = get_effective_char (pfile);
1251 result->type = CPP_MOD_EQ;
1252 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1254 result->flags |= DIGRAPH;
1255 result->type = CPP_HASH;
1256 if (get_effective_char (pfile) == '%')
1258 const unsigned char *pos = buffer->cur;
1260 if (get_effective_char (pfile) == ':')
1261 result->type = CPP_PASTE;
1263 buffer->cur = pos - 1;
1268 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1270 result->flags |= DIGRAPH;
1271 result->type = CPP_CLOSE_BRACE;
1276 result->type = CPP_MOD;
1281 result->type = CPP_DOT;
1282 c = get_effective_char (pfile);
1285 const unsigned char *pos = buffer->cur;
1287 if (get_effective_char (pfile) == '.')
1288 result->type = CPP_ELLIPSIS;
1290 buffer->cur = pos - 1;
1292 /* All known character sets have 0...9 contiguous. */
1293 else if (ISDIGIT (c))
1295 result->type = CPP_NUMBER;
1296 parse_number (pfile, &result->val.str, c, 1);
1298 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1299 result->type = CPP_DOT_STAR;
1305 c = get_effective_char (pfile);
1307 result->type = CPP_PLUS_PLUS;
1309 result->type = CPP_PLUS_EQ;
1313 result->type = CPP_PLUS;
1318 c = get_effective_char (pfile);
1321 result->type = CPP_DEREF;
1322 if (CPP_OPTION (pfile, cplusplus))
1324 if (get_effective_char (pfile) == '*')
1325 result->type = CPP_DEREF_STAR;
1331 result->type = CPP_MINUS_MINUS;
1333 result->type = CPP_MINUS_EQ;
1337 result->type = CPP_MINUS;
1342 c = get_effective_char (pfile);
1344 result->type = CPP_AND_AND;
1346 result->type = CPP_AND_EQ;
1350 result->type = CPP_AND;
1355 c = get_effective_char (pfile);
1357 result->type = CPP_OR_OR;
1359 result->type = CPP_OR_EQ;
1363 result->type = CPP_OR;
1368 c = get_effective_char (pfile);
1369 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1370 result->type = CPP_SCOPE;
1371 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1373 result->flags |= DIGRAPH;
1374 result->type = CPP_CLOSE_SQUARE;
1379 result->type = CPP_COLON;
1383 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1384 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1385 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1386 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1387 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1389 case '~': result->type = CPP_COMPL; break;
1390 case ',': result->type = CPP_COMMA; break;
1391 case '(': result->type = CPP_OPEN_PAREN; break;
1392 case ')': result->type = CPP_CLOSE_PAREN; break;
1393 case '[': result->type = CPP_OPEN_SQUARE; break;
1394 case ']': result->type = CPP_CLOSE_SQUARE; break;
1395 case '{': result->type = CPP_OPEN_BRACE; break;
1396 case '}': result->type = CPP_CLOSE_BRACE; break;
1397 case ';': result->type = CPP_SEMICOLON; break;
1399 /* @ is a punctuator in Objective C. */
1400 case '@': result->type = CPP_ATSIGN; break;
1403 if (CPP_OPTION (pfile, dollars_in_ident))
1405 /* Fall through... */
1409 result->type = CPP_OTHER;
1417 /* An upper bound on the number of bytes needed to spell TOKEN,
1418 including preceding whitespace. */
1420 cpp_token_len (token)
1421 const cpp_token *token;
1425 switch (TOKEN_SPELL (token))
1427 default: len = 0; break;
1429 case SPELL_STRING: len = token->val.str.len; break;
1430 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1432 /* 1 for whitespace, 4 for comment delimiters. */
1436 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1437 already contain the enough space to hold the token's spelling.
1438 Returns a pointer to the character after the last character
1441 cpp_spell_token (pfile, token, buffer)
1442 cpp_reader *pfile; /* Would be nice to be rid of this... */
1443 const cpp_token *token;
1444 unsigned char *buffer;
1446 switch (TOKEN_SPELL (token))
1448 case SPELL_OPERATOR:
1450 const unsigned char *spelling;
1453 if (token->flags & DIGRAPH)
1455 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1456 else if (token->flags & NAMED_OP)
1459 spelling = TOKEN_NAME (token);
1461 while ((c = *spelling++) != '\0')
1467 *buffer++ = token->val.c;
1472 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1473 buffer += NODE_LEN (token->val.node);
1477 memcpy (buffer, token->val.str.text, token->val.str.len);
1478 buffer += token->val.str.len;
1483 int left, right, tag;
1484 switch (token->type)
1486 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1487 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1488 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1489 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1490 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1492 cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1495 if (tag) *buffer++ = tag;
1497 memcpy (buffer, token->val.str.text, token->val.str.len);
1498 buffer += token->val.str.len;
1504 cpp_ice (pfile, "unspellable token %s", TOKEN_NAME (token));
1511 /* Returns TOKEN spelt as a null-terminated string. The string is
1512 freed when the reader is destroyed. Useful for diagnostics. */
1514 cpp_token_as_text (pfile, token)
1516 const cpp_token *token;
1518 unsigned int len = cpp_token_len (token);
1519 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1521 end = cpp_spell_token (pfile, token, start);
1527 /* Used by C front ends, which really should move to using
1528 cpp_token_as_text. */
1530 cpp_type2name (type)
1531 enum cpp_ttype type;
1533 return (const char *) token_spellings[type].name;
1536 /* Writes the spelling of token to FP, without any preceding space.
1537 Separated from cpp_spell_token for efficiency - to avoid stdio
1538 double-buffering. */
1540 cpp_output_token (token, fp)
1541 const cpp_token *token;
1544 switch (TOKEN_SPELL (token))
1546 case SPELL_OPERATOR:
1548 const unsigned char *spelling;
1551 if (token->flags & DIGRAPH)
1553 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1554 else if (token->flags & NAMED_OP)
1557 spelling = TOKEN_NAME (token);
1562 while ((c = *++spelling) != '\0');
1567 putc (token->val.c, fp);
1572 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1576 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1581 int left, right, tag;
1582 switch (token->type)
1584 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1585 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1586 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1587 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1588 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1590 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1593 if (tag) putc (tag, fp);
1595 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1601 /* An error, most probably. */
1606 /* Compare two tokens. */
1608 _cpp_equiv_tokens (a, b)
1609 const cpp_token *a, *b;
1611 if (a->type == b->type && a->flags == b->flags)
1612 switch (TOKEN_SPELL (a))
1614 default: /* Keep compiler happy. */
1615 case SPELL_OPERATOR:
1618 return a->val.c == b->val.c; /* Character. */
1620 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1622 return a->val.node == b->val.node;
1625 return (a->val.str.len == b->val.str.len
1626 && !memcmp (a->val.str.text, b->val.str.text,
1633 /* Returns nonzero if a space should be inserted to avoid an
1634 accidental token paste for output. For simplicity, it is
1635 conservative, and occasionally advises a space where one is not
1636 needed, e.g. "." and ".2". */
1638 cpp_avoid_paste (pfile, token1, token2)
1640 const cpp_token *token1, *token2;
1642 enum cpp_ttype a = token1->type, b = token2->type;
1645 if (token1->flags & NAMED_OP)
1647 if (token2->flags & NAMED_OP)
1651 if (token2->flags & DIGRAPH)
1652 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1653 else if (token_spellings[b].category == SPELL_OPERATOR)
1654 c = token_spellings[b].name[0];
1656 /* Quickly get everything that can paste with an '='. */
1657 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1662 case CPP_GREATER: return c == '>' || c == '?';
1663 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1664 case CPP_PLUS: return c == '+';
1665 case CPP_MINUS: return c == '-' || c == '>';
1666 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1667 case CPP_MOD: return c == ':' || c == '>';
1668 case CPP_AND: return c == '&';
1669 case CPP_OR: return c == '|';
1670 case CPP_COLON: return c == ':' || c == '>';
1671 case CPP_DEREF: return c == '*';
1672 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1673 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1674 case CPP_NAME: return ((b == CPP_NUMBER
1675 && name_p (pfile, &token2->val.str))
1677 || b == CPP_CHAR || b == CPP_STRING); /* L */
1678 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1679 || c == '.' || c == '+' || c == '-');
1680 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1681 && token1->val.c == '@'
1682 && (b == CPP_NAME || b == CPP_STRING));
1689 /* Output all the remaining tokens on the current line, and a newline
1690 character, to FP. Leading whitespace is removed. If there are
1691 macros, special token padding is not performed. */
1693 cpp_output_line (pfile, fp)
1697 const cpp_token *token;
1699 token = cpp_get_token (pfile);
1700 while (token->type != CPP_EOF)
1702 cpp_output_token (token, fp);
1703 token = cpp_get_token (pfile);
1704 if (token->flags & PREV_WHITE)
1711 /* Returns the value of a hexadecimal digit. */
1717 return hex_value (c);
1722 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1723 failure if cpplib is not parsing C++ or C99. Such failure is
1724 silent, and no variables are updated. Otherwise returns 0, and
1725 warns if -Wtraditional.
1727 [lex.charset]: The character designated by the universal character
1728 name \UNNNNNNNN is that character whose character short name in
1729 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1730 universal character name \uNNNN is that character whose character
1731 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1732 for a universal character name is less than 0x20 or in the range
1733 0x7F-0x9F (inclusive), or if the universal character name
1734 designates a character in the basic source character set, then the
1735 program is ill-formed.
1737 We assume that wchar_t is Unicode, so we don't need to do any
1738 mapping. Is this ever wrong?
1740 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1741 LIMIT is the end of the string or charconst. PSTR is updated to
1742 point after the UCS on return, and the UCS is written into PC. */
1745 maybe_read_ucs (pfile, pstr, limit, pc)
1747 const unsigned char **pstr;
1748 const unsigned char *limit;
1751 const unsigned char *p = *pstr;
1752 unsigned int code = 0;
1753 unsigned int c = *pc, length;
1755 /* Only attempt to interpret a UCS for C++ and C99. */
1756 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1759 if (CPP_WTRADITIONAL (pfile))
1760 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1762 length = (c == 'u' ? 4: 8);
1764 if ((size_t) (limit - p) < length)
1766 cpp_error (pfile, "incomplete universal-character-name");
1767 /* Skip to the end to avoid more diagnostics. */
1772 for (; length; length--, p++)
1776 code = (code << 4) + hex_digit_value (c);
1780 "non-hex digit '%c' in universal-character-name", c);
1781 /* We shouldn't skip in case there are multibyte chars. */
1787 #ifdef TARGET_EBCDIC
1788 cpp_error (pfile, "universal-character-name on EBCDIC target");
1789 code = 0x3f; /* EBCDIC invalid character */
1791 /* True extended characters are OK. */
1793 && !(code & 0x80000000)
1794 && !(code >= 0xD800 && code <= 0xDFFF))
1796 /* The standard permits $, @ and ` to be specified as UCNs. We use
1797 hex escapes so that this also works with EBCDIC hosts. */
1798 else if (code == 0x24 || code == 0x40 || code == 0x60)
1800 /* Don't give another error if one occurred above. */
1801 else if (length == 0)
1802 cpp_error (pfile, "universal-character-name out of range");
1810 /* Interpret an escape sequence, and return its value. PSTR points to
1811 the input pointer, which is just after the backslash. LIMIT is how
1812 much text we have. MASK is a bitmask for the precision for the
1813 destination type (char or wchar_t). TRADITIONAL, if true, does not
1814 interpret escapes that did not exist in traditional C.
1816 Handles all relevant diagnostics. */
1818 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1820 const unsigned char **pstr;
1821 const unsigned char *limit;
1822 unsigned HOST_WIDE_INT mask;
1826 const unsigned char *str = *pstr;
1827 unsigned int c = *str++;
1831 case '\\': case '\'': case '"': case '?': break;
1832 case 'b': c = TARGET_BS; break;
1833 case 'f': c = TARGET_FF; break;
1834 case 'n': c = TARGET_NEWLINE; break;
1835 case 'r': c = TARGET_CR; break;
1836 case 't': c = TARGET_TAB; break;
1837 case 'v': c = TARGET_VT; break;
1839 case '(': case '{': case '[': case '%':
1840 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1841 '\%' is used to prevent SCCS from getting confused. */
1842 unknown = CPP_PEDANTIC (pfile);
1846 if (CPP_WTRADITIONAL (pfile))
1847 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1853 if (CPP_PEDANTIC (pfile))
1854 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1859 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1863 if (CPP_WTRADITIONAL (pfile))
1864 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1868 unsigned int i = 0, overflow = 0;
1869 int digits_found = 0;
1877 overflow |= i ^ (i << 4 >> 4);
1878 i = (i << 4) + hex_digit_value (c);
1883 cpp_error (pfile, "\\x used with no following hex digits");
1885 if (overflow | (i != (i & mask)))
1887 cpp_pedwarn (pfile, "hex escape sequence out of range");
1894 case '0': case '1': case '2': case '3':
1895 case '4': case '5': case '6': case '7':
1897 unsigned int i = c - '0';
1900 while (str < limit && ++count < 3)
1903 if (c < '0' || c > '7')
1906 i = (i << 3) + c - '0';
1909 if (i != (i & mask))
1911 cpp_pedwarn (pfile, "octal escape sequence out of range");
1926 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1928 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1932 cpp_pedwarn (pfile, "escape sequence out of range for character");
1938 #ifndef MAX_CHAR_TYPE_SIZE
1939 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1942 #ifndef MAX_WCHAR_TYPE_SIZE
1943 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1946 /* Interpret a (possibly wide) character constant in TOKEN.
1947 WARN_MULTI warns about multi-character charconsts, if not
1948 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1949 that did not exist in traditional C. PCHARS_SEEN points to a
1950 variable that is filled in with the number of characters seen. */
1952 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1954 const cpp_token *token;
1957 unsigned int *pchars_seen;
1959 const unsigned char *str = token->val.str.text;
1960 const unsigned char *limit = str + token->val.str.len;
1961 unsigned int chars_seen = 0;
1962 unsigned int width, max_chars, c;
1963 unsigned HOST_WIDE_INT mask;
1964 HOST_WIDE_INT result = 0;
1967 #ifdef MULTIBYTE_CHARS
1968 (void) local_mbtowc (NULL, NULL, 0);
1971 /* Width in bits. */
1972 if (token->type == CPP_CHAR)
1974 width = MAX_CHAR_TYPE_SIZE;
1975 unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
1979 width = MAX_WCHAR_TYPE_SIZE;
1980 unsigned_p = WCHAR_UNSIGNED;
1983 if (width < HOST_BITS_PER_WIDE_INT)
1984 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1987 max_chars = HOST_BITS_PER_WIDE_INT / width;
1991 #ifdef MULTIBYTE_CHARS
1995 char_len = local_mbtowc (&wc, str, limit - str);
1998 cpp_warning (pfile, "ignoring invalid multibyte character");
2011 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2013 #ifdef MAP_CHARACTER
2015 c = MAP_CHARACTER (c);
2018 /* Merge character into result; ignore excess chars. */
2019 if (++chars_seen <= max_chars)
2021 if (width < HOST_BITS_PER_WIDE_INT)
2022 result = (result << width) | (c & mask);
2028 if (chars_seen == 0)
2029 cpp_error (pfile, "empty character constant");
2030 else if (chars_seen > max_chars)
2032 chars_seen = max_chars;
2033 cpp_warning (pfile, "character constant too long");
2035 else if (chars_seen > 1 && !traditional && warn_multi)
2036 cpp_warning (pfile, "multi-character character constant");
2038 /* If relevant type is signed, sign-extend the constant. */
2041 unsigned int nbits = chars_seen * width;
2043 mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
2044 if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
2050 *pchars_seen = chars_seen;
2054 /* Memory buffers. Changing these three constants can have a dramatic
2055 effect on performance. The values here are reasonable defaults,
2056 but might be tuned. If you adjust them, be sure to test across a
2057 range of uses of cpplib, including heavy nested function-like macro
2058 expansion. Also check the change in peak memory usage (NJAMD is a
2059 good tool for this). */
2060 #define MIN_BUFF_SIZE 8000
2061 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2062 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2063 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2065 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2066 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2079 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2080 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2082 /* Create a new allocation buffer. Place the control block at the end
2083 of the buffer, so that buffer overflows will cause immediate chaos. */
2089 unsigned char *base;
2091 if (len < MIN_BUFF_SIZE)
2092 len = MIN_BUFF_SIZE;
2093 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2095 base = xmalloc (len + sizeof (_cpp_buff));
2096 result = (_cpp_buff *) (base + len);
2097 result->base = base;
2099 result->limit = base + len;
2100 result->next = NULL;
2104 /* Place a chain of unwanted allocation buffers on the free list. */
2106 _cpp_release_buff (pfile, buff)
2110 _cpp_buff *end = buff;
2114 end->next = pfile->free_buffs;
2115 pfile->free_buffs = buff;
2118 /* Return a free buffer of size at least MIN_SIZE. */
2120 _cpp_get_buff (pfile, min_size)
2124 _cpp_buff *result, **p;
2126 for (p = &pfile->free_buffs;; p = &(*p)->next)
2131 return new_buff (min_size);
2133 size = result->limit - result->base;
2134 /* Return a buffer that's big enough, but don't waste one that's
2136 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2141 result->next = NULL;
2142 result->cur = result->base;
2146 /* Creates a new buffer with enough space to hold the uncommitted
2147 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2148 the excess bytes to the new buffer. Chains the new buffer after
2149 BUFF, and returns the new buffer. */
2151 _cpp_append_extend_buff (pfile, buff, min_extra)
2156 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2157 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2159 buff->next = new_buff;
2160 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2164 /* Creates a new buffer with enough space to hold the uncommitted
2165 remaining bytes of the buffer pointed to by BUFF, and at least
2166 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2167 Chains the new buffer before the buffer pointed to by BUFF, and
2168 updates the pointer to point to the new buffer. */
2170 _cpp_extend_buff (pfile, pbuff, min_extra)
2175 _cpp_buff *new_buff, *old_buff = *pbuff;
2176 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2178 new_buff = _cpp_get_buff (pfile, size);
2179 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2180 new_buff->next = old_buff;
2184 /* Free a chain of buffers starting at BUFF. */
2186 _cpp_free_buff (buff)
2191 for (; buff; buff = next)
2198 /* Allocate permanent, unaligned storage of length LEN. */
2200 _cpp_unaligned_alloc (pfile, len)
2204 _cpp_buff *buff = pfile->u_buff;
2205 unsigned char *result = buff->cur;
2207 if (len > (size_t) (buff->limit - result))
2209 buff = _cpp_get_buff (pfile, len);
2210 buff->next = pfile->u_buff;
2211 pfile->u_buff = buff;
2215 buff->cur = result + len;
2219 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2220 That buffer is used for growing allocations when saving macro
2221 replacement lists in a #define, and when parsing an answer to an
2222 assertion in #assert, #unassert or #if (and therefore possibly
2223 whilst expanding macros). It therefore must not be used by any
2224 code that they might call: specifically the lexer and the guts of
2227 All existing other uses clearly fit this restriction: storing
2228 registered pragmas during initialization. */
2230 _cpp_aligned_alloc (pfile, len)
2234 _cpp_buff *buff = pfile->a_buff;
2235 unsigned char *result = buff->cur;
2237 if (len > (size_t) (buff->limit - result))
2239 buff = _cpp_get_buff (pfile, len);
2240 buff->next = pfile->a_buff;
2241 pfile->a_buff = buff;
2245 buff->cur = result + len;