1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #ifdef MULTIBYTE_CHARS
34 /* Tokens with SPELL_STRING store their spelling in the token list,
35 and it's length in the token->val.name.len. */
48 enum spell_type category;
49 const unsigned char *name;
52 static const unsigned char *const digraph_spellings[] =
53 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
55 #define OP(e, s) { SPELL_OPERATOR, U s },
56 #define TK(e, s) { s, U STRINGX (e) },
57 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
61 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
62 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
63 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
65 static void handle_newline PARAMS ((cpp_reader *));
66 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
67 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
69 static int skip_asm_block PARAMS ((cpp_reader *));
70 static int skip_block_comment PARAMS ((cpp_reader *));
71 static int skip_line_comment PARAMS ((cpp_reader *));
72 static void adjust_column PARAMS ((cpp_reader *));
73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
77 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
78 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
79 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
80 static bool trigraph_p PARAMS ((cpp_reader *));
81 static unsigned int copy_text_chars PARAMS ((char *, const char *, unsigned int));
82 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const uchar *));
83 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
85 static bool continue_after_nul PARAMS ((cpp_reader *));
86 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
87 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
88 const unsigned char *, cppchar_t *));
89 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
91 static unsigned int hex_digit_value PARAMS ((unsigned int));
92 static _cpp_buff *new_buff PARAMS ((size_t));
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
99 cpp_ideq (token, string)
100 const cpp_token *token;
103 if (token->type != CPP_NAME)
106 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
109 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
110 Returns with buffer->cur pointing to the character immediately
111 following the newline (combination). */
113 handle_newline (pfile)
116 cpp_buffer *buffer = pfile->buffer;
118 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
119 only accept CR-LF; maybe we should fall back to that behavior? */
120 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
123 buffer->line_base = buffer->cur;
124 buffer->col_adjust = 0;
128 /* Subroutine of skip_escaped_newlines; called when a 3-character
129 sequence beginning with "??" is encountered. buffer->cur points to
132 Warn if necessary, and returns true if the sequence forms a
133 trigraph and the trigraph should be honored. */
138 cpp_buffer *buffer = pfile->buffer;
139 cppchar_t from_char = buffer->cur[1];
142 if (!_cpp_trigraph_map[from_char])
145 accept = CPP_OPTION (pfile, trigraphs);
147 /* Don't warn about trigraphs in comments. */
148 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
151 cpp_error_with_line (pfile, DL_WARNING,
152 pfile->line, CPP_BUF_COL (buffer) - 1,
153 "trigraph ??%c converted to %c",
155 (int) _cpp_trigraph_map[from_char]);
156 else if (buffer->cur != buffer->last_Wtrigraphs)
158 buffer->last_Wtrigraphs = buffer->cur;
159 cpp_error_with_line (pfile, DL_WARNING,
160 pfile->line, CPP_BUF_COL (buffer) - 1,
161 "trigraph ??%c ignored", (int) from_char);
168 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
169 lie in buffer->cur[-1]. Returns the next byte, which will be in
170 buffer->cur[-1]. This routine performs preprocessing stages 1 and
171 2 of the ISO C standard. */
173 skip_escaped_newlines (pfile)
176 cpp_buffer *buffer = pfile->buffer;
177 cppchar_t next = buffer->cur[-1];
179 /* Only do this if we apply stages 1 and 2. */
180 if (!buffer->from_stage3)
182 const unsigned char *saved_cur;
189 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
192 /* Translate the trigraph. */
193 next = _cpp_trigraph_map[buffer->cur[1]];
199 if (buffer->cur == buffer->rlimit)
202 /* We have a backslash, and room for at least one more
203 character. Skip horizontal whitespace. */
204 saved_cur = buffer->cur;
206 next1 = *buffer->cur++;
207 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
209 if (!is_vspace (next1))
211 buffer->cur = saved_cur;
215 if (saved_cur != buffer->cur - 1
216 && !pfile->state.lexing_comment)
217 cpp_error (pfile, DL_WARNING,
218 "backslash and newline separated by space");
220 handle_newline (pfile);
221 buffer->backup_to = buffer->cur;
222 if (buffer->cur == buffer->rlimit)
224 cpp_error (pfile, DL_PEDWARN,
225 "backslash-newline at end of file");
229 next = *buffer->cur++;
231 while (next == '\\' || next == '?');
237 /* Obtain the next character, after trigraph conversion and skipping
238 an arbitrarily long string of escaped newlines. The common case of
239 no trigraphs or escaped newlines falls through quickly. On return,
240 buffer->backup_to points to where to return to if the character is
241 not to be processed. */
243 get_effective_char (pfile)
247 cpp_buffer *buffer = pfile->buffer;
249 buffer->backup_to = buffer->cur;
250 next = *buffer->cur++;
251 if (__builtin_expect (next == '?' || next == '\\', 0))
252 next = skip_escaped_newlines (pfile);
257 /* SDCC _asm specific */
258 /* Skip an _asm ... _endasm block. We find the end of the comment by
259 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
262 skip_asm_block (pfile)
265 #define _ENDASM_STR "endasm"
266 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
268 cpp_buffer *buffer = pfile->buffer;
273 pfile->state.lexing_comment = 1;
274 while (buffer->cur != buffer->rlimit)
276 prev_space = is_space(c);
279 /* FIXME: For speed, create a new character class of characters
280 of interest inside block comments. */
281 if (c == '?' || c == '\\')
282 c = skip_escaped_newlines (pfile);
284 if (prev_space && c == '_')
286 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
287 strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
289 buffer->cur += _ENDASM_LEN;
294 else if (is_vspace (c))
296 prev_space = is_space(c);
297 handle_newline (pfile);
300 adjust_column (pfile);
303 pfile->state.lexing_comment = 0;
307 /* Skip a C-style block comment. We find the end of the comment by
308 seeing if an asterisk is before every '/' we encounter. Returns
309 nonzero if comment terminated by EOF, zero otherwise. */
311 skip_block_comment (pfile)
314 cpp_buffer *buffer = pfile->buffer;
315 cppchar_t c = EOF, prevc = EOF;
317 pfile->state.lexing_comment = 1;
318 while (buffer->cur != buffer->rlimit)
320 prevc = c, c = *buffer->cur++;
322 /* FIXME: For speed, create a new character class of characters
323 of interest inside block comments. */
324 if (c == '?' || c == '\\')
325 c = skip_escaped_newlines (pfile);
327 /* People like decorating comments with '*', so check for '/'
328 instead for efficiency. */
334 /* Warn about potential nested comments, but not if the '/'
335 comes immediately before the true comment delimiter.
336 Don't bother to get it right across escaped newlines. */
337 if (CPP_OPTION (pfile, warn_comments)
338 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
339 cpp_error_with_line (pfile, DL_WARNING,
340 pfile->line, CPP_BUF_COL (buffer),
341 "\"/*\" within comment");
343 else if (is_vspace (c))
344 handle_newline (pfile);
346 adjust_column (pfile);
349 pfile->state.lexing_comment = 0;
350 return c != '/' || prevc != '*';
353 /* Skip a C++ line comment, leaving buffer->cur pointing to the
354 terminating newline. Handles escaped newlines. Returns nonzero
355 if a multiline comment. */
357 skip_line_comment (pfile)
360 cpp_buffer *buffer = pfile->buffer;
361 unsigned int orig_line = pfile->line;
363 #ifdef MULTIBYTE_CHARS
368 pfile->state.lexing_comment = 1;
369 #ifdef MULTIBYTE_CHARS
370 /* Reset multibyte conversion state. */
371 (void) local_mbtowc (NULL, NULL, 0);
375 if (buffer->cur == buffer->rlimit)
378 #ifdef MULTIBYTE_CHARS
379 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
380 buffer->rlimit - buffer->cur);
383 cpp_error (pfile, DL_WARNING,
384 "ignoring invalid multibyte character");
390 buffer->cur += char_len;
396 if (c == '?' || c == '\\')
397 c = skip_escaped_newlines (pfile);
399 while (!is_vspace (c));
401 /* Step back over the newline, except at EOF. */
405 pfile->state.lexing_comment = 0;
406 return orig_line != pfile->line;
409 /* pfile->buffer->cur is one beyond the \t character. Update
410 col_adjust so we track the column correctly. */
412 adjust_column (pfile)
415 cpp_buffer *buffer = pfile->buffer;
416 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
418 /* Round it up to multiple of the tabstop, but subtract 1 since the
419 tab itself occupies a character position. */
420 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
421 - col % CPP_OPTION (pfile, tabstop)) - 1;
424 /* Skips whitespace, saving the next non-whitespace character.
425 Adjusts pfile->col_adjust to account for tabs. Without this,
426 tokens might be assigned an incorrect column. */
428 skip_whitespace (pfile, c)
432 cpp_buffer *buffer = pfile->buffer;
433 unsigned int warned = 0;
437 /* Horizontal space always OK. */
441 adjust_column (pfile);
442 /* Just \f \v or \0 left. */
445 if (buffer->cur - 1 == buffer->rlimit)
449 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
453 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
454 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
455 CPP_BUF_COL (buffer),
456 "%s in preprocessing directive",
457 c == '\f' ? "form feed" : "vertical tab");
461 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
462 while (is_nvspace (c));
468 /* See if the characters of a number token are valid in a name (no
471 name_p (pfile, string)
473 const cpp_string *string;
477 for (i = 0; i < string->len; i++)
478 if (!is_idchar (string->text[i]))
484 /* Parse an identifier, skipping embedded backslash-newlines. This is
485 a critical inner loop. The common case is an identifier which has
486 not been split by backslash-newline, does not contain a dollar
487 sign, and has already been scanned (roughly 10:1 ratio of
488 seen:unseen identifiers in normal code; the distribution is
489 Poisson-like). Second most common case is a new identifier, not
490 split and no dollar sign. The other possibilities are rare and
491 have been relegated to parse_slow. */
492 static cpp_hashnode *
493 parse_identifier (pfile)
496 cpp_hashnode *result;
497 const uchar *cur, *base;
499 /* Fast-path loop. Skim over a normal identifier.
500 N.B. ISIDNUM does not include $. */
501 cur = pfile->buffer->cur;
502 while (ISIDNUM (*cur))
505 /* Check for slow-path cases. */
506 if (*cur == '?' || *cur == '\\' || *cur == '$')
510 base = parse_slow (pfile, cur, 0, &len);
511 result = (cpp_hashnode *)
512 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
516 base = pfile->buffer->cur - 1;
517 pfile->buffer->cur = cur;
518 result = (cpp_hashnode *)
519 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
522 /* Rarely, identifiers require diagnostics when lexed.
523 XXX Has to be forced out of the fast path. */
524 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
525 && !pfile->state.skipping, 0))
527 /* It is allowed to poison the same identifier twice. */
528 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
529 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
532 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
533 replacement list of a variadic macro. */
534 if (result == pfile->spec_nodes.n__VA_ARGS__
535 && !pfile->state.va_args_ok)
536 cpp_error (pfile, DL_PEDWARN,
537 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
543 /* Slow path. This handles numbers and identifiers which have been
544 split, or contain dollar signs. The part of the token from
545 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
546 1 if it's a number, and 2 if it has a leading period. Returns a
547 pointer to the token's NUL-terminated spelling in permanent
548 storage, and sets PLEN to its length. */
550 parse_slow (pfile, cur, number_p, plen)
556 cpp_buffer *buffer = pfile->buffer;
557 const uchar *base = buffer->cur - 1;
558 struct obstack *stack = &pfile->hash_table->stack;
559 unsigned int c, prevc, saw_dollar = 0;
561 /* Place any leading period. */
563 obstack_1grow (stack, '.');
565 /* Copy the part of the token which is known to be okay. */
566 obstack_grow (stack, base, cur - base);
568 /* Now process the part which isn't. We are looking at one of
569 '$', '\\', or '?' on entry to this loop. */
575 /* Potential escaped newline? */
576 buffer->backup_to = buffer->cur - 1;
577 if (c == '?' || c == '\\')
578 c = skip_escaped_newlines (pfile);
582 if (!ISXDIGIT (c) && c != '.' && !VALID_SIGN (c, prevc) && !VALID_HEX (c, prevc))
585 obstack_1grow (stack, c);
587 base = cur = buffer->cur;
588 while (ISXDIGIT (*cur))
592 obstack_grow (stack, base, cur - base);
603 /* Handle normal identifier characters in this loop. */
607 obstack_1grow (stack, c);
614 while (is_idchar (c));
618 /* Step back over the unwanted char. */
621 /* $ is not an identifier character in the standard, but is commonly
622 accepted as an extension. Don't warn about it in skipped
623 conditional blocks. */
624 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
625 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
627 /* Identifiers and numbers are null-terminated. */
628 *plen = obstack_object_size (stack);
629 obstack_1grow (stack, '\0');
630 return obstack_finish (stack);
633 /* Parse a number, beginning with character C, skipping embedded
634 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
635 before C. Place the result in NUMBER. */
637 parse_number (pfile, number, leading_period)
644 /* Fast-path loop. Skim over a normal number.
645 N.B. ISIDNUM does not include $. */
646 cur = pfile->buffer->cur;
648 while (ISXDIGIT (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]) || VALID_HEX (*cur, cur[-1]))
651 /* Check for slow-path cases. */
652 if (*cur == '?' || *cur == '\\' || *cur == '$')
653 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
656 const uchar *base = pfile->buffer->cur - 1;
659 number->len = cur - base + leading_period;
660 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
661 dest[number->len] = '\0';
666 memcpy (dest, base, cur - base);
667 pfile->buffer->cur = cur;
671 /* Subroutine of parse_string. */
673 unescaped_terminator_p (pfile, dest)
675 const unsigned char *dest;
677 const unsigned char *start, *temp;
679 /* In #include-style directives, terminators are not escapeable. */
680 if (pfile->state.angled_headers)
683 start = BUFF_FRONT (pfile->u_buff);
685 /* An odd number of consecutive backslashes represents an escaped
687 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
690 return ((dest - temp) & 1) == 0;
693 /* Parses a string, character constant, or angle-bracketed header file
694 name. Handles embedded trigraphs and escaped newlines. The stored
695 string is guaranteed NUL-terminated, but it is not guaranteed that
696 this is the first NUL since embedded NULs are preserved.
698 When this function returns, buffer->cur points to the next
699 character to be processed. */
701 parse_string (pfile, token, terminator)
704 cppchar_t terminator;
706 cpp_buffer *buffer = pfile->buffer;
707 unsigned char *dest, *limit;
709 bool warned_nulls = false;
710 #ifdef MULTIBYTE_CHARS
715 dest = BUFF_FRONT (pfile->u_buff);
716 limit = BUFF_LIMIT (pfile->u_buff);
718 #ifdef MULTIBYTE_CHARS
719 /* Reset multibyte conversion state. */
720 (void) local_mbtowc (NULL, NULL, 0);
724 /* We need room for another char, possibly the terminating NUL. */
725 if ((size_t) (limit - dest) < 1)
727 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
728 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
729 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
730 limit = BUFF_LIMIT (pfile->u_buff);
733 #ifdef MULTIBYTE_CHARS
734 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
735 buffer->rlimit - buffer->cur);
738 cpp_error (pfile, DL_WARNING,
739 "ignoring invalid multibyte character");
745 buffer->cur += char_len;
752 /* Handle trigraphs, escaped newlines etc. */
753 if (c == '?' || c == '\\')
754 c = skip_escaped_newlines (pfile);
758 if (unescaped_terminator_p (pfile, dest))
761 else if (is_vspace (c))
763 /* No string literal may extend over multiple lines. In
764 assembly language, suppress the error except for <>
765 includes. This is a kludge around not knowing where
768 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
769 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
776 if (buffer->cur - 1 == buffer->rlimit)
781 cpp_error (pfile, DL_WARNING,
782 "null character(s) preserved in literal");
785 #ifdef MULTIBYTE_CHARS
788 for ( ; char_len > 0; --char_len)
789 *dest++ = (*buffer->cur - char_len);
798 token->val.str.text = BUFF_FRONT (pfile->u_buff);
799 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
800 BUFF_FRONT (pfile->u_buff) = dest + 1;
803 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
804 comment blocks (when executed with -C option) and
805 _asm (SDCPP specific) blocks */
807 /* Count and copy characters from src to dest, excluding CRs:
808 CRs are automatically generated, because the output is
809 opened in TEXT mode. If dest == NULL, only count chars */
811 copy_text_chars (dest, src, len)
819 for (p = src; p != src + len; ++p)
834 /* SDCC _asm specific */
835 /* The stored comment includes the comment start and any terminator. */
837 save_asm (pfile, token, from)
840 const unsigned char *from;
842 #define _ASM_STR "_asm"
843 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
845 unsigned char *buffer;
846 unsigned int text_len, len;
848 len = pfile->buffer->cur - from;
849 /* + _ASM_LEN for the initial '_asm'. */
850 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
851 buffer = _cpp_unaligned_alloc (pfile, text_len);
854 token->type = CPP_ASM;
855 token->val.str.len = text_len;
856 token->val.str.text = buffer;
858 memcpy (buffer, _ASM_STR, _ASM_LEN);
859 copy_text_chars (buffer + _ASM_LEN, from, len);
862 /* The stored comment includes the comment start and any terminator. */
864 save_comment (pfile, token, from, type)
867 const unsigned char *from;
870 unsigned char *buffer;
871 unsigned int len, clen;
873 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
875 /* C++ comments probably (not definitely) have moved past a new
876 line, which we don't want to save in the comment. */
877 if (is_vspace (pfile->buffer->cur[-1]))
880 /* If we are currently in a directive, then we need to store all
881 C++ comments as C comments internally, and so we need to
882 allocate a little extra space in that case.
884 Note that the only time we encounter a directive here is
885 when we are saving comments in a "#define". */
886 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
888 buffer = _cpp_unaligned_alloc (pfile, clen);
890 token->type = CPP_COMMENT;
891 token->val.str.len = clen;
892 token->val.str.text = buffer;
895 copy_text_chars (buffer + 1, from, len);
897 /* Finish conversion to a C comment, if necessary. */
898 if (pfile->state.in_directive && type == '/')
901 buffer[clen - 2] = '*';
902 buffer[clen - 1] = '/';
906 /* Allocate COUNT tokens for RUN. */
908 _cpp_init_tokenrun (run, count)
912 run->base = xnewvec (cpp_token, count);
913 run->limit = run->base + count;
917 /* Returns the next tokenrun, or creates one if there is none. */
922 if (run->next == NULL)
924 run->next = xnew (tokenrun);
925 run->next->prev = run;
926 _cpp_init_tokenrun (run->next, 250);
932 /* Allocate a single token that is invalidated at the same time as the
933 rest of the tokens on the line. Has its line and col set to the
934 same as the last lexed token, so that diagnostics appear in the
937 _cpp_temp_token (pfile)
940 cpp_token *old, *result;
942 old = pfile->cur_token - 1;
943 if (pfile->cur_token == pfile->cur_run->limit)
945 pfile->cur_run = next_tokenrun (pfile->cur_run);
946 pfile->cur_token = pfile->cur_run->base;
949 result = pfile->cur_token++;
950 result->line = old->line;
951 result->col = old->col;
955 /* Lex a token into RESULT (external interface). Takes care of issues
956 like directive handling, token lookahead, multiple include
957 optimization and skipping. */
959 _cpp_lex_token (pfile)
966 if (pfile->cur_token == pfile->cur_run->limit)
968 pfile->cur_run = next_tokenrun (pfile->cur_run);
969 pfile->cur_token = pfile->cur_run->base;
972 if (pfile->lookaheads)
975 result = pfile->cur_token++;
978 result = _cpp_lex_direct (pfile);
980 if (result->flags & BOL)
982 /* Is this a directive. If _cpp_handle_directive returns
983 false, it is an assembler #. */
984 if (result->type == CPP_HASH
985 /* 6.10.3 p 11: Directives in a list of macro arguments
986 gives undefined behavior. This implementation
987 handles the directive as normal. */
988 && pfile->state.parsing_args != 1
989 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
991 if (pfile->cb.line_change && !pfile->state.skipping)
992 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
995 /* We don't skip tokens in directives. */
996 if (pfile->state.in_directive)
999 /* Outside a directive, invalidate controlling macros. At file
1000 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1001 get here and MI optimisation works. */
1002 pfile->mi_valid = false;
1004 if (!pfile->state.skipping || result->type == CPP_EOF)
1011 /* A NUL terminates the current buffer. For ISO preprocessing this is
1012 EOF, but for traditional preprocessing it indicates we need a line
1013 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
1014 to return a CPP_EOF to the caller. */
1016 continue_after_nul (pfile)
1019 cpp_buffer *buffer = pfile->buffer;
1022 buffer->saved_flags = BOL;
1023 if (CPP_OPTION (pfile, traditional))
1025 if (pfile->state.in_directive)
1028 _cpp_remove_overlay (pfile);
1029 more = _cpp_read_logical_line_trad (pfile);
1030 _cpp_overlay_buffer (pfile, pfile->out.base,
1031 pfile->out.cur - pfile->out.base);
1032 pfile->line = pfile->out.first_line;
1036 /* Stop parsing arguments with a CPP_EOF. When we finally come
1037 back here, do the work of popping the buffer. */
1038 if (!pfile->state.parsing_args)
1040 if (buffer->cur != buffer->line_base)
1042 /* Non-empty files should end in a newline. Don't warn
1043 for command line and _Pragma buffers. */
1044 if (!buffer->from_stage3)
1045 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
1046 handle_newline (pfile);
1049 /* Similarly, finish an in-progress directive with CPP_EOF
1050 before popping the buffer. */
1051 if (!pfile->state.in_directive && buffer->prev)
1053 more = !buffer->return_at_eof;
1054 _cpp_pop_buffer (pfile);
1062 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1064 if (get_effective_char (pfile) == CHAR) \
1065 result->type = THEN_TYPE; \
1069 result->type = ELSE_TYPE; \
1073 /* Lex a token into pfile->cur_token, which is also incremented, to
1074 get diagnostics pointing to the correct location.
1076 Does not handle issues such as token lookahead, multiple-include
1077 optimisation, directives, skipping etc. This function is only
1078 suitable for use by _cpp_lex_token, and in special cases like
1079 lex_expansion_token which doesn't care for any of these issues.
1081 When meeting a newline, returns CPP_EOF if parsing a directive,
1082 otherwise returns to the start of the token buffer if permissible.
1083 Returns the location of the lexed token. */
1085 _cpp_lex_direct (pfile)
1090 const unsigned char *comment_start;
1091 cpp_token *result = pfile->cur_token++;
1094 buffer = pfile->buffer;
1095 result->flags = buffer->saved_flags;
1096 buffer->saved_flags = 0;
1098 result->line = pfile->line;
1102 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1107 case ' ': case '\t': case '\f': case '\v': case '\0':
1108 result->flags |= PREV_WHITE;
1109 if (skip_whitespace (pfile, c))
1112 /* End of buffer. */
1114 if (continue_after_nul (pfile))
1116 result->type = CPP_EOF;
1119 case '\n': case '\r':
1120 handle_newline (pfile);
1121 buffer->saved_flags = BOL;
1122 if (! pfile->state.in_directive)
1124 if (pfile->state.parsing_args == 2)
1125 buffer->saved_flags |= PREV_WHITE;
1126 if (!pfile->keep_tokens)
1128 pfile->cur_run = &pfile->base_run;
1129 result = pfile->base_run.base;
1130 pfile->cur_token = result + 1;
1134 result->type = CPP_EOF;
1139 /* These could start an escaped newline, or '?' a trigraph. Let
1140 skip_escaped_newlines do all the work. */
1142 unsigned int line = pfile->line;
1144 c = skip_escaped_newlines (pfile);
1145 if (line != pfile->line)
1148 /* We had at least one escaped newline of some sort.
1149 Update the token's line and column. */
1150 goto update_tokens_line;
1154 /* We are either the original '?' or '\\', or a trigraph. */
1156 result->type = CPP_QUERY;
1163 case '0': case '1': case '2': case '3': case '4':
1164 case '5': case '6': case '7': case '8': case '9':
1165 result->type = CPP_NUMBER;
1166 parse_number (pfile, &result->val.str, 0);
1170 /* 'L' may introduce wide characters or strings. */
1172 const unsigned char *pos = buffer->cur;
1174 c = get_effective_char (pfile);
1175 if (c == '\'' || c == '"')
1177 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1178 parse_string (pfile, result, c);
1187 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1188 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1189 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1190 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1192 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1193 case 'G': case 'H': case 'I': case 'J': case 'K':
1194 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1195 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1197 result->type = CPP_NAME;
1198 result->val.node = parse_identifier (pfile);
1200 /* SDCC _asm specific */
1201 /* handle _asm ... _endasm ; */
1202 if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1204 comment_start = buffer->cur;
1205 result->type = CPP_ASM;
1206 skip_asm_block (pfile);
1207 /* Save the _asm block as a token in its own right. */
1208 save_asm (pfile, result, comment_start);
1210 /* Convert named operators to their proper types. */
1211 else if (result->val.node->flags & NODE_OPERATOR)
1213 result->flags |= NAMED_OP;
1214 result->type = result->val.node->value.operator;
1220 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1221 parse_string (pfile, result, c);
1225 /* A potential block or line comment. */
1226 comment_start = buffer->cur;
1227 c = get_effective_char (pfile);
1231 if (skip_block_comment (pfile))
1232 cpp_error (pfile, DL_ERROR, "unterminated comment");
1234 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1235 || CPP_IN_SYSTEM_HEADER (pfile)))
1237 /* Warn about comments only if pedantically GNUC89, and not
1238 in system headers. */
1239 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1240 && ! buffer->warned_cplusplus_comments)
1242 cpp_error (pfile, DL_PEDWARN,
1243 "C++ style comments are not allowed in ISO C90");
1244 cpp_error (pfile, DL_PEDWARN,
1245 "(this will be reported only once per input file)");
1246 buffer->warned_cplusplus_comments = 1;
1249 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1250 cpp_error (pfile, DL_WARNING, "multi-line comment");
1254 result->type = CPP_DIV_EQ;
1260 result->type = CPP_DIV;
1264 if (!pfile->state.save_comments)
1266 result->flags |= PREV_WHITE;
1267 goto update_tokens_line;
1270 /* Save the comment as a token in its own right. */
1271 save_comment (pfile, result, comment_start, c);
1275 if (pfile->state.angled_headers)
1277 result->type = CPP_HEADER_NAME;
1278 parse_string (pfile, result, '>');
1282 c = get_effective_char (pfile);
1284 result->type = CPP_LESS_EQ;
1286 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1287 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1288 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1289 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1291 result->type = CPP_OPEN_SQUARE;
1292 result->flags |= DIGRAPH;
1294 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1296 result->type = CPP_OPEN_BRACE;
1297 result->flags |= DIGRAPH;
1302 result->type = CPP_LESS;
1307 c = get_effective_char (pfile);
1309 result->type = CPP_GREATER_EQ;
1311 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1312 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1313 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1317 result->type = CPP_GREATER;
1322 c = get_effective_char (pfile);
1324 result->type = CPP_MOD_EQ;
1325 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1327 result->flags |= DIGRAPH;
1328 result->type = CPP_HASH;
1329 if (get_effective_char (pfile) == '%')
1331 const unsigned char *pos = buffer->cur;
1333 if (get_effective_char (pfile) == ':')
1334 result->type = CPP_PASTE;
1336 buffer->cur = pos - 1;
1341 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1343 result->flags |= DIGRAPH;
1344 result->type = CPP_CLOSE_BRACE;
1349 result->type = CPP_MOD;
1354 result->type = CPP_DOT;
1355 c = get_effective_char (pfile);
1358 const unsigned char *pos = buffer->cur;
1360 if (get_effective_char (pfile) == '.')
1361 result->type = CPP_ELLIPSIS;
1363 buffer->cur = pos - 1;
1365 /* All known character sets have 0...9 contiguous. */
1366 else if (ISDIGIT (c))
1368 result->type = CPP_NUMBER;
1369 parse_number (pfile, &result->val.str, 1);
1371 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1372 result->type = CPP_DOT_STAR;
1378 c = get_effective_char (pfile);
1380 result->type = CPP_PLUS_PLUS;
1382 result->type = CPP_PLUS_EQ;
1386 result->type = CPP_PLUS;
1391 c = get_effective_char (pfile);
1394 result->type = CPP_DEREF;
1395 if (CPP_OPTION (pfile, cplusplus))
1397 if (get_effective_char (pfile) == '*')
1398 result->type = CPP_DEREF_STAR;
1404 result->type = CPP_MINUS_MINUS;
1406 result->type = CPP_MINUS_EQ;
1410 result->type = CPP_MINUS;
1415 c = get_effective_char (pfile);
1417 result->type = CPP_AND_AND;
1419 result->type = CPP_AND_EQ;
1423 result->type = CPP_AND;
1428 c = get_effective_char (pfile);
1430 result->type = CPP_OR_OR;
1432 result->type = CPP_OR_EQ;
1436 result->type = CPP_OR;
1441 c = get_effective_char (pfile);
1442 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1443 result->type = CPP_SCOPE;
1444 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1446 result->flags |= DIGRAPH;
1447 result->type = CPP_CLOSE_SQUARE;
1452 result->type = CPP_COLON;
1456 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1457 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1458 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1459 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1460 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1462 case '~': result->type = CPP_COMPL; break;
1463 case ',': result->type = CPP_COMMA; break;
1464 case '(': result->type = CPP_OPEN_PAREN; break;
1465 case ')': result->type = CPP_CLOSE_PAREN; break;
1466 case '[': result->type = CPP_OPEN_SQUARE; break;
1467 case ']': result->type = CPP_CLOSE_SQUARE; break;
1468 case '{': result->type = CPP_OPEN_BRACE; break;
1469 case '}': result->type = CPP_CLOSE_BRACE; break;
1470 case ';': result->type = CPP_SEMICOLON; break;
1472 /* @ is a punctuator in Objective-C. */
1473 case '@': result->type = CPP_ATSIGN; break;
1476 if (CPP_OPTION (pfile, dollars_in_ident))
1478 /* Fall through... */
1482 result->type = CPP_OTHER;
1490 /* An upper bound on the number of bytes needed to spell TOKEN,
1491 including preceding whitespace. */
1493 cpp_token_len (token)
1494 const cpp_token *token;
1498 switch (TOKEN_SPELL (token))
1500 default: len = 0; break;
1502 case SPELL_STRING: len = token->val.str.len; break;
1503 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1505 /* 1 for whitespace, 4 for comment delimiters. */
1509 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1510 already contain the enough space to hold the token's spelling.
1511 Returns a pointer to the character after the last character
1514 cpp_spell_token (pfile, token, buffer)
1515 cpp_reader *pfile; /* Would be nice to be rid of this... */
1516 const cpp_token *token;
1517 unsigned char *buffer;
1519 switch (TOKEN_SPELL (token))
1521 case SPELL_OPERATOR:
1523 const unsigned char *spelling;
1526 if (token->flags & DIGRAPH)
1528 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1529 else if (token->flags & NAMED_OP)
1532 spelling = TOKEN_NAME (token);
1534 while ((c = *spelling++) != '\0')
1540 *buffer++ = token->val.c;
1545 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1546 buffer += NODE_LEN (token->val.node);
1550 memcpy (buffer, token->val.str.text, token->val.str.len);
1551 buffer += token->val.str.len;
1556 int left, right, tag;
1557 switch (token->type)
1559 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1560 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1561 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1562 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1563 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1565 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1566 TOKEN_NAME (token));
1569 if (tag) *buffer++ = tag;
1571 memcpy (buffer, token->val.str.text, token->val.str.len);
1572 buffer += token->val.str.len;
1578 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1585 /* Returns TOKEN spelt as a null-terminated string. The string is
1586 freed when the reader is destroyed. Useful for diagnostics. */
1588 cpp_token_as_text (pfile, token)
1590 const cpp_token *token;
1592 unsigned int len = cpp_token_len (token);
1593 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1595 end = cpp_spell_token (pfile, token, start);
1601 /* Used by C front ends, which really should move to using
1602 cpp_token_as_text. */
1604 cpp_type2name (type)
1605 enum cpp_ttype type;
1607 return (const char *) token_spellings[type].name;
1610 /* Writes the spelling of token to FP, without any preceding space.
1611 Separated from cpp_spell_token for efficiency - to avoid stdio
1612 double-buffering. */
1614 cpp_output_token (token, fp)
1615 const cpp_token *token;
1618 switch (TOKEN_SPELL (token))
1620 case SPELL_OPERATOR:
1622 const unsigned char *spelling;
1625 if (token->flags & DIGRAPH)
1627 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1628 else if (token->flags & NAMED_OP)
1631 spelling = TOKEN_NAME (token);
1636 while ((c = *++spelling) != '\0');
1641 putc (token->val.c, fp);
1646 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1650 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1655 int left, right, tag;
1656 switch (token->type)
1658 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1659 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1660 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1661 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1662 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1663 case CPP_ASM: left = '\0'; right = '\0'; tag = '\0'; break;
1665 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1668 if (tag) putc (tag, fp);
1669 if (left) putc (left, fp);
1670 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1671 if (right) putc (right, fp);
1676 /* An error, most probably. */
1681 /* Compare two tokens. */
1683 _cpp_equiv_tokens (a, b)
1684 const cpp_token *a, *b;
1686 if (a->type == b->type && a->flags == b->flags)
1687 switch (TOKEN_SPELL (a))
1689 default: /* Keep compiler happy. */
1690 case SPELL_OPERATOR:
1693 return a->val.c == b->val.c; /* Character. */
1695 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1697 return a->val.node == b->val.node;
1700 return (a->val.str.len == b->val.str.len
1701 && !memcmp (a->val.str.text, b->val.str.text,
1708 /* Returns nonzero if a space should be inserted to avoid an
1709 accidental token paste for output. For simplicity, it is
1710 conservative, and occasionally advises a space where one is not
1711 needed, e.g. "." and ".2". */
1713 cpp_avoid_paste (pfile, token1, token2)
1715 const cpp_token *token1, *token2;
1717 enum cpp_ttype a = token1->type, b = token2->type;
1720 if (token1->flags & NAMED_OP)
1722 if (token2->flags & NAMED_OP)
1726 if (token2->flags & DIGRAPH)
1727 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1728 else if (token_spellings[b].category == SPELL_OPERATOR)
1729 c = token_spellings[b].name[0];
1731 /* Quickly get everything that can paste with an '='. */
1732 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1737 case CPP_GREATER: return c == '>' || c == '?';
1738 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1739 case CPP_PLUS: return c == '+';
1740 case CPP_MINUS: return c == '-' || c == '>';
1741 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1742 case CPP_MOD: return c == ':' || c == '>';
1743 case CPP_AND: return c == '&';
1744 case CPP_OR: return c == '|';
1745 case CPP_COLON: return c == ':' || c == '>';
1746 case CPP_DEREF: return c == '*';
1747 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1748 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1749 case CPP_NAME: return ((b == CPP_NUMBER
1750 && name_p (pfile, &token2->val.str))
1752 || b == CPP_CHAR || b == CPP_STRING); /* L */
1753 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1754 || c == '.' || c == '+' || c == '-');
1755 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1756 && token1->val.c == '@'
1757 && (b == CPP_NAME || b == CPP_STRING));
1764 /* Output all the remaining tokens on the current line, and a newline
1765 character, to FP. Leading whitespace is removed. If there are
1766 macros, special token padding is not performed. */
1768 cpp_output_line (pfile, fp)
1772 const cpp_token *token;
1774 token = cpp_get_token (pfile);
1775 while (token->type != CPP_EOF)
1777 cpp_output_token (token, fp);
1778 token = cpp_get_token (pfile);
1779 if (token->flags & PREV_WHITE)
1786 /* Returns the value of a hexadecimal digit. */
1792 return hex_value (c);
1797 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1798 failure if cpplib is not parsing C++ or C99. Such failure is
1799 silent, and no variables are updated. Otherwise returns 0, and
1800 warns if -Wtraditional.
1802 [lex.charset]: The character designated by the universal character
1803 name \UNNNNNNNN is that character whose character short name in
1804 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1805 universal character name \uNNNN is that character whose character
1806 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1807 for a universal character name is less than 0x20 or in the range
1808 0x7F-0x9F (inclusive), or if the universal character name
1809 designates a character in the basic source character set, then the
1810 program is ill-formed.
1812 We assume that wchar_t is Unicode, so we don't need to do any
1813 mapping. Is this ever wrong?
1815 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1816 LIMIT is the end of the string or charconst. PSTR is updated to
1817 point after the UCS on return, and the UCS is written into PC. */
1820 maybe_read_ucs (pfile, pstr, limit, pc)
1822 const unsigned char **pstr;
1823 const unsigned char *limit;
1826 const unsigned char *p = *pstr;
1827 unsigned int code = 0;
1828 unsigned int c = *pc, length;
1830 /* Only attempt to interpret a UCS for C++ and C99. */
1831 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1834 if (CPP_WTRADITIONAL (pfile))
1835 cpp_error (pfile, DL_WARNING,
1836 "the meaning of '\\%c' is different in traditional C", c);
1838 length = (c == 'u' ? 4: 8);
1840 if ((size_t) (limit - p) < length)
1842 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1843 /* Skip to the end to avoid more diagnostics. */
1848 for (; length; length--, p++)
1852 code = (code << 4) + hex_digit_value (c);
1855 cpp_error (pfile, DL_ERROR,
1856 "non-hex digit '%c' in universal-character-name", c);
1857 /* We shouldn't skip in case there are multibyte chars. */
1863 #ifdef TARGET_EBCDIC
1864 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1865 code = 0x3f; /* EBCDIC invalid character */
1867 /* True extended characters are OK. */
1869 && !(code & 0x80000000)
1870 && !(code >= 0xD800 && code <= 0xDFFF))
1872 /* The standard permits $, @ and ` to be specified as UCNs. We use
1873 hex escapes so that this also works with EBCDIC hosts. */
1874 else if (code == 0x24 || code == 0x40 || code == 0x60)
1876 /* Don't give another error if one occurred above. */
1877 else if (length == 0)
1878 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1886 /* Returns the value of an escape sequence, truncated to the correct
1887 target precision. PSTR points to the input pointer, which is just
1888 after the backslash. LIMIT is how much text we have. WIDE is true
1889 if the escape sequence is part of a wide character constant or
1890 string literal. Handles all relevant diagnostics. */
1892 cpp_parse_escape (pfile, pstr, limit, wide)
1894 const unsigned char **pstr;
1895 const unsigned char *limit;
1899 const unsigned char *str = *pstr;
1904 width = CPP_OPTION (pfile, wchar_precision);
1906 width = CPP_OPTION (pfile, char_precision);
1907 if (width < BITS_PER_CPPCHAR_T)
1908 mask = ((cppchar_t) 1 << width) - 1;
1915 case '\\': case '\'': case '"': case '?': break;
1916 case 'b': c = TARGET_BS; break;
1917 case 'f': c = TARGET_FF; break;
1918 case 'n': c = TARGET_NEWLINE; break;
1919 case 'r': c = TARGET_CR; break;
1920 case 't': c = TARGET_TAB; break;
1921 case 'v': c = TARGET_VT; break;
1923 case '(': case '{': case '[': case '%':
1924 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1925 '\%' is used to prevent SCCS from getting confused. */
1926 unknown = CPP_PEDANTIC (pfile);
1930 if (CPP_WTRADITIONAL (pfile))
1931 cpp_error (pfile, DL_WARNING,
1932 "the meaning of '\\a' is different in traditional C");
1937 if (CPP_PEDANTIC (pfile))
1938 cpp_error (pfile, DL_PEDWARN,
1939 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1944 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1948 if (CPP_WTRADITIONAL (pfile))
1949 cpp_error (pfile, DL_WARNING,
1950 "the meaning of '\\x' is different in traditional C");
1953 cppchar_t i = 0, overflow = 0;
1954 int digits_found = 0;
1962 overflow |= i ^ (i << 4 >> 4);
1963 i = (i << 4) + hex_digit_value (c);
1968 cpp_error (pfile, DL_ERROR,
1969 "\\x used with no following hex digits");
1971 if (overflow | (i != (i & mask)))
1973 cpp_error (pfile, DL_PEDWARN,
1974 "hex escape sequence out of range");
1981 case '0': case '1': case '2': case '3':
1982 case '4': case '5': case '6': case '7':
1985 cppchar_t i = c - '0';
1987 while (str < limit && ++count < 3)
1990 if (c < '0' || c > '7')
1993 i = (i << 3) + c - '0';
1996 if (i != (i & mask))
1998 cpp_error (pfile, DL_PEDWARN,
1999 "octal escape sequence out of range");
2014 cpp_error (pfile, DL_PEDWARN,
2015 "unknown escape sequence '\\%c'", (int) c);
2017 cpp_error (pfile, DL_PEDWARN,
2018 "unknown escape sequence: '\\%03o'", (int) c);
2023 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
2031 /* Interpret a (possibly wide) character constant in TOKEN.
2032 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
2033 points to a variable that is filled in with the number of
2034 characters seen, and UNSIGNEDP to a variable that indicates whether
2035 the result has signed type. */
2037 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
2039 const cpp_token *token;
2040 unsigned int *pchars_seen;
2043 const unsigned char *str = token->val.str.text;
2044 const unsigned char *limit = str + token->val.str.len;
2045 unsigned int chars_seen = 0;
2046 size_t width, max_chars;
2047 cppchar_t c, mask, result = 0;
2050 #ifdef MULTIBYTE_CHARS
2051 (void) local_mbtowc (NULL, NULL, 0);
2054 /* Width in bits. */
2055 if (token->type == CPP_CHAR)
2057 width = CPP_OPTION (pfile, char_precision);
2058 max_chars = CPP_OPTION (pfile, int_precision) / width;
2059 unsigned_p = CPP_OPTION (pfile, unsigned_char);
2063 width = CPP_OPTION (pfile, wchar_precision);
2065 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
2068 if (width < BITS_PER_CPPCHAR_T)
2069 mask = ((cppchar_t) 1 << width) - 1;
2075 #ifdef MULTIBYTE_CHARS
2079 char_len = local_mbtowc (&wc, str, limit - str);
2082 cpp_error (pfile, DL_WARNING,
2083 "ignoring invalid multibyte character");
2096 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
2098 #ifdef MAP_CHARACTER
2100 c = MAP_CHARACTER (c);
2105 /* Truncate the character, scale the result and merge the two. */
2107 if (width < BITS_PER_CPPCHAR_T)
2108 result = (result << width) | c;
2113 if (chars_seen == 0)
2114 cpp_error (pfile, DL_ERROR, "empty character constant");
2115 else if (chars_seen > 1)
2117 /* Multichar charconsts are of type int and therefore signed. */
2120 if (chars_seen > max_chars)
2122 chars_seen = max_chars;
2123 cpp_error (pfile, DL_WARNING,
2124 "character constant too long for its type");
2126 else if (CPP_OPTION (pfile, warn_multichar))
2127 cpp_error (pfile, DL_WARNING, "multi-character character constant");
2130 /* Sign-extend or truncate the constant to cppchar_t. The value is
2131 in WIDTH bits, but for multi-char charconsts it's value is the
2132 full target type's width. */
2135 if (width < BITS_PER_CPPCHAR_T)
2137 mask = ((cppchar_t) 1 << width) - 1;
2138 if (unsigned_p || !(result & (1 << (width - 1))))
2144 *pchars_seen = chars_seen;
2145 *unsignedp = unsigned_p;
2149 /* Memory buffers. Changing these three constants can have a dramatic
2150 effect on performance. The values here are reasonable defaults,
2151 but might be tuned. If you adjust them, be sure to test across a
2152 range of uses of cpplib, including heavy nested function-like macro
2153 expansion. Also check the change in peak memory usage (NJAMD is a
2154 good tool for this). */
2155 #define MIN_BUFF_SIZE 8000
2156 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2157 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2158 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2160 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2161 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2164 /* Create a new allocation buffer. Place the control block at the end
2165 of the buffer, so that buffer overflows will cause immediate chaos. */
2171 unsigned char *base;
2173 if (len < MIN_BUFF_SIZE)
2174 len = MIN_BUFF_SIZE;
2175 len = CPP_ALIGN (len);
2177 base = xmalloc (len + sizeof (_cpp_buff));
2178 result = (_cpp_buff *) (base + len);
2179 result->base = base;
2181 result->limit = base + len;
2182 result->next = NULL;
2186 /* Place a chain of unwanted allocation buffers on the free list. */
2188 _cpp_release_buff (pfile, buff)
2192 _cpp_buff *end = buff;
2196 end->next = pfile->free_buffs;
2197 pfile->free_buffs = buff;
2200 /* Return a free buffer of size at least MIN_SIZE. */
2202 _cpp_get_buff (pfile, min_size)
2206 _cpp_buff *result, **p;
2208 for (p = &pfile->free_buffs;; p = &(*p)->next)
2213 return new_buff (min_size);
2215 size = result->limit - result->base;
2216 /* Return a buffer that's big enough, but don't waste one that's
2218 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2223 result->next = NULL;
2224 result->cur = result->base;
2228 /* Creates a new buffer with enough space to hold the uncommitted
2229 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2230 the excess bytes to the new buffer. Chains the new buffer after
2231 BUFF, and returns the new buffer. */
2233 _cpp_append_extend_buff (pfile, buff, min_extra)
2238 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2239 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2241 buff->next = new_buff;
2242 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2246 /* Creates a new buffer with enough space to hold the uncommitted
2247 remaining bytes of the buffer pointed to by BUFF, and at least
2248 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2249 Chains the new buffer before the buffer pointed to by BUFF, and
2250 updates the pointer to point to the new buffer. */
2252 _cpp_extend_buff (pfile, pbuff, min_extra)
2257 _cpp_buff *new_buff, *old_buff = *pbuff;
2258 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2260 new_buff = _cpp_get_buff (pfile, size);
2261 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2262 new_buff->next = old_buff;
2266 /* Free a chain of buffers starting at BUFF. */
2268 _cpp_free_buff (buff)
2273 for (; buff; buff = next)
2280 /* Allocate permanent, unaligned storage of length LEN. */
2282 _cpp_unaligned_alloc (pfile, len)
2286 _cpp_buff *buff = pfile->u_buff;
2287 unsigned char *result = buff->cur;
2289 if (len > (size_t) (buff->limit - result))
2291 buff = _cpp_get_buff (pfile, len);
2292 buff->next = pfile->u_buff;
2293 pfile->u_buff = buff;
2297 buff->cur = result + len;
2301 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2302 That buffer is used for growing allocations when saving macro
2303 replacement lists in a #define, and when parsing an answer to an
2304 assertion in #assert, #unassert or #if (and therefore possibly
2305 whilst expanding macros). It therefore must not be used by any
2306 code that they might call: specifically the lexer and the guts of
2309 All existing other uses clearly fit this restriction: storing
2310 registered pragmas during initialization. */
2312 _cpp_aligned_alloc (pfile, len)
2316 _cpp_buff *buff = pfile->a_buff;
2317 unsigned char *result = buff->cur;
2319 if (len > (size_t) (buff->limit - result))
2321 buff = _cpp_get_buff (pfile, len);
2322 buff->next = pfile->a_buff;
2323 pfile->a_buff = buff;
2327 buff->cur = result + len;