1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #ifdef MULTIBYTE_CHARS
34 /* Tokens with SPELL_STRING store their spelling in the token list,
35 and it's length in the token->val.name.len. */
48 enum spell_type category;
49 const unsigned char *name;
52 static const unsigned char *const digraph_spellings[] =
53 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
55 #define OP(e, s) { SPELL_OPERATOR, U s },
56 #define TK(e, s) { s, U STRINGX (e) },
57 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
61 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
62 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
63 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
65 static void handle_newline PARAMS ((cpp_reader *));
66 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
67 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
69 static int skip_asm_block PARAMS ((cpp_reader *));
70 static int skip_block_comment PARAMS ((cpp_reader *));
71 static int skip_line_comment PARAMS ((cpp_reader *));
72 static void adjust_column PARAMS ((cpp_reader *));
73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
77 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
78 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
79 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
80 static bool trigraph_p PARAMS ((cpp_reader *));
81 static unsigned int copy_text_chars PARAMS ((char *, const char *, unsigned int));
82 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const uchar *));
83 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
85 static bool continue_after_nul PARAMS ((cpp_reader *));
86 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
87 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
88 const unsigned char *, cppchar_t *));
89 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
91 static unsigned int hex_digit_value PARAMS ((unsigned int));
92 static _cpp_buff *new_buff PARAMS ((size_t));
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
99 cpp_ideq (token, string)
100 const cpp_token *token;
103 if (token->type != CPP_NAME)
106 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
109 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
110 Returns with buffer->cur pointing to the character immediately
111 following the newline (combination). */
113 handle_newline (pfile)
116 cpp_buffer *buffer = pfile->buffer;
118 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
119 only accept CR-LF; maybe we should fall back to that behavior? */
120 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
123 buffer->line_base = buffer->cur;
124 buffer->col_adjust = 0;
128 /* Subroutine of skip_escaped_newlines; called when a 3-character
129 sequence beginning with "??" is encountered. buffer->cur points to
132 Warn if necessary, and returns true if the sequence forms a
133 trigraph and the trigraph should be honored. */
138 cpp_buffer *buffer = pfile->buffer;
139 cppchar_t from_char = buffer->cur[1];
142 if (!_cpp_trigraph_map[from_char])
145 accept = CPP_OPTION (pfile, trigraphs);
147 /* Don't warn about trigraphs in comments. */
148 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
151 cpp_error_with_line (pfile, DL_WARNING,
152 pfile->line, CPP_BUF_COL (buffer) - 1,
153 "trigraph ??%c converted to %c",
155 (int) _cpp_trigraph_map[from_char]);
156 else if (buffer->cur != buffer->last_Wtrigraphs)
158 buffer->last_Wtrigraphs = buffer->cur;
159 cpp_error_with_line (pfile, DL_WARNING,
160 pfile->line, CPP_BUF_COL (buffer) - 1,
161 "trigraph ??%c ignored", (int) from_char);
168 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
169 lie in buffer->cur[-1]. Returns the next byte, which will be in
170 buffer->cur[-1]. This routine performs preprocessing stages 1 and
171 2 of the ISO C standard. */
173 skip_escaped_newlines (pfile)
176 cpp_buffer *buffer = pfile->buffer;
177 cppchar_t next = buffer->cur[-1];
179 /* Only do this if we apply stages 1 and 2. */
180 if (!buffer->from_stage3)
182 const unsigned char *saved_cur;
189 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
192 /* Translate the trigraph. */
193 next = _cpp_trigraph_map[buffer->cur[1]];
199 if (buffer->cur == buffer->rlimit)
202 /* We have a backslash, and room for at least one more
203 character. Skip horizontal whitespace. */
204 saved_cur = buffer->cur;
206 next1 = *buffer->cur++;
207 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
209 if (!is_vspace (next1))
211 buffer->cur = saved_cur;
215 if (saved_cur != buffer->cur - 1
216 && !pfile->state.lexing_comment)
217 cpp_error (pfile, DL_WARNING,
218 "backslash and newline separated by space");
220 handle_newline (pfile);
221 buffer->backup_to = buffer->cur;
222 if (buffer->cur == buffer->rlimit)
224 cpp_error (pfile, DL_PEDWARN,
225 "backslash-newline at end of file");
229 next = *buffer->cur++;
231 while (next == '\\' || next == '?');
237 /* Obtain the next character, after trigraph conversion and skipping
238 an arbitrarily long string of escaped newlines. The common case of
239 no trigraphs or escaped newlines falls through quickly. On return,
240 buffer->backup_to points to where to return to if the character is
241 not to be processed. */
243 get_effective_char (pfile)
247 cpp_buffer *buffer = pfile->buffer;
249 buffer->backup_to = buffer->cur;
250 next = *buffer->cur++;
251 if (__builtin_expect (next == '?' || next == '\\', 0))
252 next = skip_escaped_newlines (pfile);
257 /* SDCC _asm specific */
258 /* Skip an _asm ... _endasm block. We find the end of the comment by
259 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
262 skip_asm_block (pfile)
265 #define _ENDASM_STR "endasm"
266 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
268 cpp_buffer *buffer = pfile->buffer;
273 pfile->state.lexing_comment = 1;
274 while (buffer->cur != buffer->rlimit)
276 prev_space = is_space(c);
279 /* FIXME: For speed, create a new character class of characters
280 of interest inside block comments. */
281 if (c == '?' || c == '\\')
282 c = skip_escaped_newlines (pfile);
284 if (prev_space && c == '_')
286 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
287 strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
289 buffer->cur += _ENDASM_LEN;
294 else if (is_vspace (c))
296 prev_space = is_space(c);
297 handle_newline (pfile);
300 adjust_column (pfile);
303 pfile->state.lexing_comment = 0;
307 /* Skip a C-style block comment. We find the end of the comment by
308 seeing if an asterisk is before every '/' we encounter. Returns
309 nonzero if comment terminated by EOF, zero otherwise. */
311 skip_block_comment (pfile)
314 cpp_buffer *buffer = pfile->buffer;
315 cppchar_t c = EOF, prevc = EOF;
317 pfile->state.lexing_comment = 1;
318 while (buffer->cur != buffer->rlimit)
320 prevc = c, c = *buffer->cur++;
322 /* FIXME: For speed, create a new character class of characters
323 of interest inside block comments. */
324 if (c == '?' || c == '\\')
325 c = skip_escaped_newlines (pfile);
327 /* People like decorating comments with '*', so check for '/'
328 instead for efficiency. */
334 /* Warn about potential nested comments, but not if the '/'
335 comes immediately before the true comment delimiter.
336 Don't bother to get it right across escaped newlines. */
337 if (CPP_OPTION (pfile, warn_comments)
338 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
339 cpp_error_with_line (pfile, DL_WARNING,
340 pfile->line, CPP_BUF_COL (buffer),
341 "\"/*\" within comment");
343 else if (is_vspace (c))
344 handle_newline (pfile);
346 adjust_column (pfile);
349 pfile->state.lexing_comment = 0;
350 return c != '/' || prevc != '*';
353 /* Skip a C++ line comment, leaving buffer->cur pointing to the
354 terminating newline. Handles escaped newlines. Returns nonzero
355 if a multiline comment. */
357 skip_line_comment (pfile)
360 cpp_buffer *buffer = pfile->buffer;
361 unsigned int orig_line = pfile->line;
363 #ifdef MULTIBYTE_CHARS
368 pfile->state.lexing_comment = 1;
369 #ifdef MULTIBYTE_CHARS
370 /* Reset multibyte conversion state. */
371 (void) local_mbtowc (NULL, NULL, 0);
375 if (buffer->cur == buffer->rlimit)
378 #ifdef MULTIBYTE_CHARS
379 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
380 buffer->rlimit - buffer->cur);
383 cpp_error (pfile, DL_WARNING,
384 "ignoring invalid multibyte character");
390 buffer->cur += char_len;
396 if (c == '?' || c == '\\')
397 c = skip_escaped_newlines (pfile);
399 while (!is_vspace (c));
401 /* Step back over the newline, except at EOF. */
405 pfile->state.lexing_comment = 0;
406 return orig_line != pfile->line;
409 /* pfile->buffer->cur is one beyond the \t character. Update
410 col_adjust so we track the column correctly. */
412 adjust_column (pfile)
415 cpp_buffer *buffer = pfile->buffer;
416 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
418 /* Round it up to multiple of the tabstop, but subtract 1 since the
419 tab itself occupies a character position. */
420 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
421 - col % CPP_OPTION (pfile, tabstop)) - 1;
424 /* Skips whitespace, saving the next non-whitespace character.
425 Adjusts pfile->col_adjust to account for tabs. Without this,
426 tokens might be assigned an incorrect column. */
428 skip_whitespace (pfile, c)
432 cpp_buffer *buffer = pfile->buffer;
433 unsigned int warned = 0;
437 /* Horizontal space always OK. */
441 adjust_column (pfile);
442 /* Just \f \v or \0 left. */
445 if (buffer->cur - 1 == buffer->rlimit)
449 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
453 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
454 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
455 CPP_BUF_COL (buffer),
456 "%s in preprocessing directive",
457 c == '\f' ? "form feed" : "vertical tab");
461 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
462 while (is_nvspace (c));
468 /* See if the characters of a number token are valid in a name (no
471 name_p (pfile, string)
473 const cpp_string *string;
477 for (i = 0; i < string->len; i++)
478 if (!is_idchar (string->text[i]))
484 /* Parse an identifier, skipping embedded backslash-newlines. This is
485 a critical inner loop. The common case is an identifier which has
486 not been split by backslash-newline, does not contain a dollar
487 sign, and has already been scanned (roughly 10:1 ratio of
488 seen:unseen identifiers in normal code; the distribution is
489 Poisson-like). Second most common case is a new identifier, not
490 split and no dollar sign. The other possibilities are rare and
491 have been relegated to parse_slow. */
492 static cpp_hashnode *
493 parse_identifier (pfile)
496 cpp_hashnode *result;
497 const uchar *cur, *base;
499 /* Fast-path loop. Skim over a normal identifier.
500 N.B. ISIDNUM does not include $. */
501 cur = pfile->buffer->cur;
502 while (ISIDNUM (*cur))
505 /* Check for slow-path cases. */
506 if (*cur == '?' || *cur == '\\' || *cur == '$')
510 base = parse_slow (pfile, cur, 0, &len);
511 result = (cpp_hashnode *)
512 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
516 base = pfile->buffer->cur - 1;
517 pfile->buffer->cur = cur;
518 result = (cpp_hashnode *)
519 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
522 /* Rarely, identifiers require diagnostics when lexed.
523 XXX Has to be forced out of the fast path. */
524 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
525 && !pfile->state.skipping, 0))
527 /* It is allowed to poison the same identifier twice. */
528 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
529 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
532 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
533 replacement list of a variadic macro. */
534 if (result == pfile->spec_nodes.n__VA_ARGS__
535 && !pfile->state.va_args_ok)
536 cpp_error (pfile, DL_PEDWARN,
537 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
543 /* Slow path. This handles numbers and identifiers which have been
544 split, or contain dollar signs. The part of the token from
545 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
546 1 if it's a number, and 2 if it has a leading period. Returns a
547 pointer to the token's NUL-terminated spelling in permanent
548 storage, and sets PLEN to its length. */
550 parse_slow (pfile, cur, number_p, plen)
556 cpp_buffer *buffer = pfile->buffer;
557 const uchar *base = buffer->cur - 1;
558 struct obstack *stack = &pfile->hash_table->stack;
559 unsigned int c, prevc, saw_dollar = 0;
561 /* Place any leading period. */
563 obstack_1grow (stack, '.');
565 /* Copy the part of the token which is known to be okay. */
566 obstack_grow (stack, base, cur - base);
568 /* Now process the part which isn't. We are looking at one of
569 '$', '\\', or '?' on entry to this loop. */
575 /* Potential escaped newline? */
576 buffer->backup_to = buffer->cur - 1;
577 if (c == '?' || c == '\\')
578 c = skip_escaped_newlines (pfile);
584 if (c != '.' && !VALID_SIGN (c, prevc))
588 /* Handle normal identifier characters in this loop. */
592 obstack_1grow (stack, c);
599 while (is_idchar (c));
602 /* Step back over the unwanted char. */
605 /* $ is not an identifier character in the standard, but is commonly
606 accepted as an extension. Don't warn about it in skipped
607 conditional blocks. */
608 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
609 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
611 /* Identifiers and numbers are null-terminated. */
612 *plen = obstack_object_size (stack);
613 obstack_1grow (stack, '\0');
614 return obstack_finish (stack);
617 /* Parse a number, beginning with character C, skipping embedded
618 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
619 before C. Place the result in NUMBER. */
621 parse_number (pfile, number, leading_period)
628 /* Fast-path loop. Skim over a normal number.
629 N.B. ISIDNUM does not include $. */
630 cur = pfile->buffer->cur;
631 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
634 /* Check for slow-path cases. */
635 if (*cur == '?' || *cur == '\\' || *cur == '$')
636 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
639 const uchar *base = pfile->buffer->cur - 1;
642 number->len = cur - base + leading_period;
643 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
644 dest[number->len] = '\0';
649 memcpy (dest, base, cur - base);
650 pfile->buffer->cur = cur;
654 /* Subroutine of parse_string. */
656 unescaped_terminator_p (pfile, dest)
658 const unsigned char *dest;
660 const unsigned char *start, *temp;
662 /* In #include-style directives, terminators are not escapeable. */
663 if (pfile->state.angled_headers)
666 start = BUFF_FRONT (pfile->u_buff);
668 /* An odd number of consecutive backslashes represents an escaped
670 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
673 return ((dest - temp) & 1) == 0;
676 /* Parses a string, character constant, or angle-bracketed header file
677 name. Handles embedded trigraphs and escaped newlines. The stored
678 string is guaranteed NUL-terminated, but it is not guaranteed that
679 this is the first NUL since embedded NULs are preserved.
681 When this function returns, buffer->cur points to the next
682 character to be processed. */
684 parse_string (pfile, token, terminator)
687 cppchar_t terminator;
689 cpp_buffer *buffer = pfile->buffer;
690 unsigned char *dest, *limit;
692 bool warned_nulls = false;
693 #ifdef MULTIBYTE_CHARS
698 dest = BUFF_FRONT (pfile->u_buff);
699 limit = BUFF_LIMIT (pfile->u_buff);
701 #ifdef MULTIBYTE_CHARS
702 /* Reset multibyte conversion state. */
703 (void) local_mbtowc (NULL, NULL, 0);
707 /* We need room for another char, possibly the terminating NUL. */
708 if ((size_t) (limit - dest) < 1)
710 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
711 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
712 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
713 limit = BUFF_LIMIT (pfile->u_buff);
716 #ifdef MULTIBYTE_CHARS
717 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
718 buffer->rlimit - buffer->cur);
721 cpp_error (pfile, DL_WARNING,
722 "ignoring invalid multibyte character");
728 buffer->cur += char_len;
735 /* Handle trigraphs, escaped newlines etc. */
736 if (c == '?' || c == '\\')
737 c = skip_escaped_newlines (pfile);
741 if (unescaped_terminator_p (pfile, dest))
744 else if (is_vspace (c))
746 /* No string literal may extend over multiple lines. In
747 assembly language, suppress the error except for <>
748 includes. This is a kludge around not knowing where
751 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
752 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
759 if (buffer->cur - 1 == buffer->rlimit)
764 cpp_error (pfile, DL_WARNING,
765 "null character(s) preserved in literal");
768 #ifdef MULTIBYTE_CHARS
771 for ( ; char_len > 0; --char_len)
772 *dest++ = (*buffer->cur - char_len);
781 token->val.str.text = BUFF_FRONT (pfile->u_buff);
782 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
783 BUFF_FRONT (pfile->u_buff) = dest + 1;
786 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
787 comment blocks (when executed with -C option) and
788 _asm (SDCPP specific) blocks */
790 /* Count and copy characters from src to dest, excluding CRs:
791 CRs are automatically generated, because the output is
792 opened in TEXT mode. If dest == NULL, only count chars */
794 copy_text_chars (dest, src, len)
802 for (p = src; p != src + len; ++p)
817 /* SDCC _asm specific */
818 /* The stored comment includes the comment start and any terminator. */
820 save_asm (pfile, token, from)
823 const unsigned char *from;
825 #define _ASM_STR "_asm"
826 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
828 unsigned char *buffer;
829 unsigned int text_len, len;
831 len = pfile->buffer->cur - from;
832 /* + _ASM_LEN for the initial '_asm'. */
833 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
834 buffer = _cpp_unaligned_alloc (pfile, text_len);
837 token->type = CPP_ASM;
838 token->val.str.len = text_len;
839 token->val.str.text = buffer;
841 memcpy (buffer, _ASM_STR, _ASM_LEN);
842 copy_text_chars (buffer + _ASM_LEN, from, len);
845 /* The stored comment includes the comment start and any terminator. */
847 save_comment (pfile, token, from, type)
850 const unsigned char *from;
853 unsigned char *buffer;
854 unsigned int len, clen;
856 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
858 /* C++ comments probably (not definitely) have moved past a new
859 line, which we don't want to save in the comment. */
860 if (is_vspace (pfile->buffer->cur[-1]))
863 /* If we are currently in a directive, then we need to store all
864 C++ comments as C comments internally, and so we need to
865 allocate a little extra space in that case.
867 Note that the only time we encounter a directive here is
868 when we are saving comments in a "#define". */
869 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
871 buffer = _cpp_unaligned_alloc (pfile, clen);
873 token->type = CPP_COMMENT;
874 token->val.str.len = clen;
875 token->val.str.text = buffer;
878 copy_text_chars (buffer + 1, from, len);
880 /* Finish conversion to a C comment, if necessary. */
881 if (pfile->state.in_directive && type == '/')
884 buffer[clen - 2] = '*';
885 buffer[clen - 1] = '/';
889 /* Allocate COUNT tokens for RUN. */
891 _cpp_init_tokenrun (run, count)
895 run->base = xnewvec (cpp_token, count);
896 run->limit = run->base + count;
900 /* Returns the next tokenrun, or creates one if there is none. */
905 if (run->next == NULL)
907 run->next = xnew (tokenrun);
908 run->next->prev = run;
909 _cpp_init_tokenrun (run->next, 250);
915 /* Allocate a single token that is invalidated at the same time as the
916 rest of the tokens on the line. Has its line and col set to the
917 same as the last lexed token, so that diagnostics appear in the
920 _cpp_temp_token (pfile)
923 cpp_token *old, *result;
925 old = pfile->cur_token - 1;
926 if (pfile->cur_token == pfile->cur_run->limit)
928 pfile->cur_run = next_tokenrun (pfile->cur_run);
929 pfile->cur_token = pfile->cur_run->base;
932 result = pfile->cur_token++;
933 result->line = old->line;
934 result->col = old->col;
938 /* Lex a token into RESULT (external interface). Takes care of issues
939 like directive handling, token lookahead, multiple include
940 optimization and skipping. */
942 _cpp_lex_token (pfile)
949 if (pfile->cur_token == pfile->cur_run->limit)
951 pfile->cur_run = next_tokenrun (pfile->cur_run);
952 pfile->cur_token = pfile->cur_run->base;
955 if (pfile->lookaheads)
958 result = pfile->cur_token++;
961 result = _cpp_lex_direct (pfile);
963 if (result->flags & BOL)
965 /* Is this a directive. If _cpp_handle_directive returns
966 false, it is an assembler #. */
967 if (result->type == CPP_HASH
968 /* 6.10.3 p 11: Directives in a list of macro arguments
969 gives undefined behavior. This implementation
970 handles the directive as normal. */
971 && pfile->state.parsing_args != 1
972 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
974 if (pfile->cb.line_change && !pfile->state.skipping)
975 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
978 /* We don't skip tokens in directives. */
979 if (pfile->state.in_directive)
982 /* Outside a directive, invalidate controlling macros. At file
983 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
984 get here and MI optimisation works. */
985 pfile->mi_valid = false;
987 if (!pfile->state.skipping || result->type == CPP_EOF)
994 /* A NUL terminates the current buffer. For ISO preprocessing this is
995 EOF, but for traditional preprocessing it indicates we need a line
996 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
997 to return a CPP_EOF to the caller. */
999 continue_after_nul (pfile)
1002 cpp_buffer *buffer = pfile->buffer;
1005 buffer->saved_flags = BOL;
1006 if (CPP_OPTION (pfile, traditional))
1008 if (pfile->state.in_directive)
1011 _cpp_remove_overlay (pfile);
1012 more = _cpp_read_logical_line_trad (pfile);
1013 _cpp_overlay_buffer (pfile, pfile->out.base,
1014 pfile->out.cur - pfile->out.base);
1015 pfile->line = pfile->out.first_line;
1019 /* Stop parsing arguments with a CPP_EOF. When we finally come
1020 back here, do the work of popping the buffer. */
1021 if (!pfile->state.parsing_args)
1023 if (buffer->cur != buffer->line_base)
1025 /* Non-empty files should end in a newline. Don't warn
1026 for command line and _Pragma buffers. */
1027 if (!buffer->from_stage3)
1028 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
1029 handle_newline (pfile);
1032 /* Similarly, finish an in-progress directive with CPP_EOF
1033 before popping the buffer. */
1034 if (!pfile->state.in_directive && buffer->prev)
1036 more = !buffer->return_at_eof;
1037 _cpp_pop_buffer (pfile);
1045 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1047 if (get_effective_char (pfile) == CHAR) \
1048 result->type = THEN_TYPE; \
1052 result->type = ELSE_TYPE; \
1056 /* Lex a token into pfile->cur_token, which is also incremented, to
1057 get diagnostics pointing to the correct location.
1059 Does not handle issues such as token lookahead, multiple-include
1060 optimisation, directives, skipping etc. This function is only
1061 suitable for use by _cpp_lex_token, and in special cases like
1062 lex_expansion_token which doesn't care for any of these issues.
1064 When meeting a newline, returns CPP_EOF if parsing a directive,
1065 otherwise returns to the start of the token buffer if permissible.
1066 Returns the location of the lexed token. */
1068 _cpp_lex_direct (pfile)
1073 const unsigned char *comment_start;
1074 cpp_token *result = pfile->cur_token++;
1077 buffer = pfile->buffer;
1078 result->flags = buffer->saved_flags;
1079 buffer->saved_flags = 0;
1081 result->line = pfile->line;
1085 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1090 case ' ': case '\t': case '\f': case '\v': case '\0':
1091 result->flags |= PREV_WHITE;
1092 if (skip_whitespace (pfile, c))
1095 /* End of buffer. */
1097 if (continue_after_nul (pfile))
1099 result->type = CPP_EOF;
1102 case '\n': case '\r':
1103 handle_newline (pfile);
1104 buffer->saved_flags = BOL;
1105 if (! pfile->state.in_directive)
1107 if (pfile->state.parsing_args == 2)
1108 buffer->saved_flags |= PREV_WHITE;
1109 if (!pfile->keep_tokens)
1111 pfile->cur_run = &pfile->base_run;
1112 result = pfile->base_run.base;
1113 pfile->cur_token = result + 1;
1117 result->type = CPP_EOF;
1122 /* These could start an escaped newline, or '?' a trigraph. Let
1123 skip_escaped_newlines do all the work. */
1125 unsigned int line = pfile->line;
1127 c = skip_escaped_newlines (pfile);
1128 if (line != pfile->line)
1131 /* We had at least one escaped newline of some sort.
1132 Update the token's line and column. */
1133 goto update_tokens_line;
1137 /* We are either the original '?' or '\\', or a trigraph. */
1139 result->type = CPP_QUERY;
1146 case '0': case '1': case '2': case '3': case '4':
1147 case '5': case '6': case '7': case '8': case '9':
1148 result->type = CPP_NUMBER;
1149 parse_number (pfile, &result->val.str, 0);
1153 /* 'L' may introduce wide characters or strings. */
1155 const unsigned char *pos = buffer->cur;
1157 c = get_effective_char (pfile);
1158 if (c == '\'' || c == '"')
1160 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1161 parse_string (pfile, result, c);
1170 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1171 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1172 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1173 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1175 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1176 case 'G': case 'H': case 'I': case 'J': case 'K':
1177 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1178 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1180 result->type = CPP_NAME;
1181 result->val.node = parse_identifier (pfile);
1183 /* SDCC _asm specific */
1184 /* handle _asm ... _endasm ; */
1185 if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1187 comment_start = buffer->cur;
1188 result->type = CPP_ASM;
1189 skip_asm_block (pfile);
1190 /* Save the _asm block as a token in its own right. */
1191 save_asm (pfile, result, comment_start);
1193 /* Convert named operators to their proper types. */
1194 else if (result->val.node->flags & NODE_OPERATOR)
1196 result->flags |= NAMED_OP;
1197 result->type = result->val.node->value.operator;
1203 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1204 parse_string (pfile, result, c);
1208 /* A potential block or line comment. */
1209 comment_start = buffer->cur;
1210 c = get_effective_char (pfile);
1214 if (skip_block_comment (pfile))
1215 cpp_error (pfile, DL_ERROR, "unterminated comment");
1217 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1218 || CPP_IN_SYSTEM_HEADER (pfile)))
1220 /* Warn about comments only if pedantically GNUC89, and not
1221 in system headers. */
1222 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1223 && ! buffer->warned_cplusplus_comments)
1225 cpp_error (pfile, DL_PEDWARN,
1226 "C++ style comments are not allowed in ISO C90");
1227 cpp_error (pfile, DL_PEDWARN,
1228 "(this will be reported only once per input file)");
1229 buffer->warned_cplusplus_comments = 1;
1232 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1233 cpp_error (pfile, DL_WARNING, "multi-line comment");
1237 result->type = CPP_DIV_EQ;
1243 result->type = CPP_DIV;
1247 if (!pfile->state.save_comments)
1249 result->flags |= PREV_WHITE;
1250 goto update_tokens_line;
1253 /* Save the comment as a token in its own right. */
1254 save_comment (pfile, result, comment_start, c);
1258 if (pfile->state.angled_headers)
1260 result->type = CPP_HEADER_NAME;
1261 parse_string (pfile, result, '>');
1265 c = get_effective_char (pfile);
1267 result->type = CPP_LESS_EQ;
1269 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1270 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1271 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1272 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1274 result->type = CPP_OPEN_SQUARE;
1275 result->flags |= DIGRAPH;
1277 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1279 result->type = CPP_OPEN_BRACE;
1280 result->flags |= DIGRAPH;
1285 result->type = CPP_LESS;
1290 c = get_effective_char (pfile);
1292 result->type = CPP_GREATER_EQ;
1294 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1295 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1296 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1300 result->type = CPP_GREATER;
1305 c = get_effective_char (pfile);
1307 result->type = CPP_MOD_EQ;
1308 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1310 result->flags |= DIGRAPH;
1311 result->type = CPP_HASH;
1312 if (get_effective_char (pfile) == '%')
1314 const unsigned char *pos = buffer->cur;
1316 if (get_effective_char (pfile) == ':')
1317 result->type = CPP_PASTE;
1319 buffer->cur = pos - 1;
1324 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1326 result->flags |= DIGRAPH;
1327 result->type = CPP_CLOSE_BRACE;
1332 result->type = CPP_MOD;
1337 result->type = CPP_DOT;
1338 c = get_effective_char (pfile);
1341 const unsigned char *pos = buffer->cur;
1343 if (get_effective_char (pfile) == '.')
1344 result->type = CPP_ELLIPSIS;
1346 buffer->cur = pos - 1;
1348 /* All known character sets have 0...9 contiguous. */
1349 else if (ISDIGIT (c))
1351 result->type = CPP_NUMBER;
1352 parse_number (pfile, &result->val.str, 1);
1354 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1355 result->type = CPP_DOT_STAR;
1361 c = get_effective_char (pfile);
1363 result->type = CPP_PLUS_PLUS;
1365 result->type = CPP_PLUS_EQ;
1369 result->type = CPP_PLUS;
1374 c = get_effective_char (pfile);
1377 result->type = CPP_DEREF;
1378 if (CPP_OPTION (pfile, cplusplus))
1380 if (get_effective_char (pfile) == '*')
1381 result->type = CPP_DEREF_STAR;
1387 result->type = CPP_MINUS_MINUS;
1389 result->type = CPP_MINUS_EQ;
1393 result->type = CPP_MINUS;
1398 c = get_effective_char (pfile);
1400 result->type = CPP_AND_AND;
1402 result->type = CPP_AND_EQ;
1406 result->type = CPP_AND;
1411 c = get_effective_char (pfile);
1413 result->type = CPP_OR_OR;
1415 result->type = CPP_OR_EQ;
1419 result->type = CPP_OR;
1424 c = get_effective_char (pfile);
1425 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1426 result->type = CPP_SCOPE;
1427 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1429 result->flags |= DIGRAPH;
1430 result->type = CPP_CLOSE_SQUARE;
1435 result->type = CPP_COLON;
1439 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1440 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1441 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1442 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1443 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1445 case '~': result->type = CPP_COMPL; break;
1446 case ',': result->type = CPP_COMMA; break;
1447 case '(': result->type = CPP_OPEN_PAREN; break;
1448 case ')': result->type = CPP_CLOSE_PAREN; break;
1449 case '[': result->type = CPP_OPEN_SQUARE; break;
1450 case ']': result->type = CPP_CLOSE_SQUARE; break;
1451 case '{': result->type = CPP_OPEN_BRACE; break;
1452 case '}': result->type = CPP_CLOSE_BRACE; break;
1453 case ';': result->type = CPP_SEMICOLON; break;
1455 /* @ is a punctuator in Objective-C. */
1456 case '@': result->type = CPP_ATSIGN; break;
1459 if (CPP_OPTION (pfile, dollars_in_ident))
1461 /* Fall through... */
1465 result->type = CPP_OTHER;
1473 /* An upper bound on the number of bytes needed to spell TOKEN,
1474 including preceding whitespace. */
1476 cpp_token_len (token)
1477 const cpp_token *token;
1481 switch (TOKEN_SPELL (token))
1483 default: len = 0; break;
1485 case SPELL_STRING: len = token->val.str.len; break;
1486 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1488 /* 1 for whitespace, 4 for comment delimiters. */
1492 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1493 already contain the enough space to hold the token's spelling.
1494 Returns a pointer to the character after the last character
1497 cpp_spell_token (pfile, token, buffer)
1498 cpp_reader *pfile; /* Would be nice to be rid of this... */
1499 const cpp_token *token;
1500 unsigned char *buffer;
1502 switch (TOKEN_SPELL (token))
1504 case SPELL_OPERATOR:
1506 const unsigned char *spelling;
1509 if (token->flags & DIGRAPH)
1511 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1512 else if (token->flags & NAMED_OP)
1515 spelling = TOKEN_NAME (token);
1517 while ((c = *spelling++) != '\0')
1523 *buffer++ = token->val.c;
1528 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1529 buffer += NODE_LEN (token->val.node);
1533 memcpy (buffer, token->val.str.text, token->val.str.len);
1534 buffer += token->val.str.len;
1539 int left, right, tag;
1540 switch (token->type)
1542 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1543 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1544 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1545 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1546 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1548 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1549 TOKEN_NAME (token));
1552 if (tag) *buffer++ = tag;
1554 memcpy (buffer, token->val.str.text, token->val.str.len);
1555 buffer += token->val.str.len;
1561 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1568 /* Returns TOKEN spelt as a null-terminated string. The string is
1569 freed when the reader is destroyed. Useful for diagnostics. */
1571 cpp_token_as_text (pfile, token)
1573 const cpp_token *token;
1575 unsigned int len = cpp_token_len (token);
1576 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1578 end = cpp_spell_token (pfile, token, start);
1584 /* Used by C front ends, which really should move to using
1585 cpp_token_as_text. */
1587 cpp_type2name (type)
1588 enum cpp_ttype type;
1590 return (const char *) token_spellings[type].name;
1593 /* Writes the spelling of token to FP, without any preceding space.
1594 Separated from cpp_spell_token for efficiency - to avoid stdio
1595 double-buffering. */
1597 cpp_output_token (token, fp)
1598 const cpp_token *token;
1601 switch (TOKEN_SPELL (token))
1603 case SPELL_OPERATOR:
1605 const unsigned char *spelling;
1608 if (token->flags & DIGRAPH)
1610 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1611 else if (token->flags & NAMED_OP)
1614 spelling = TOKEN_NAME (token);
1619 while ((c = *++spelling) != '\0');
1624 putc (token->val.c, fp);
1629 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1633 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1638 int left, right, tag;
1639 switch (token->type)
1641 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1642 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1643 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1644 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1645 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1646 case CPP_ASM: left = '\0'; right = '\0'; tag = '\0'; break;
1648 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1651 if (tag) putc (tag, fp);
1652 if (left) putc (left, fp);
1653 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1654 if (right) putc (right, fp);
1659 /* An error, most probably. */
1664 /* Compare two tokens. */
1666 _cpp_equiv_tokens (a, b)
1667 const cpp_token *a, *b;
1669 if (a->type == b->type && a->flags == b->flags)
1670 switch (TOKEN_SPELL (a))
1672 default: /* Keep compiler happy. */
1673 case SPELL_OPERATOR:
1676 return a->val.c == b->val.c; /* Character. */
1678 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1680 return a->val.node == b->val.node;
1683 return (a->val.str.len == b->val.str.len
1684 && !memcmp (a->val.str.text, b->val.str.text,
1691 /* Returns nonzero if a space should be inserted to avoid an
1692 accidental token paste for output. For simplicity, it is
1693 conservative, and occasionally advises a space where one is not
1694 needed, e.g. "." and ".2". */
1696 cpp_avoid_paste (pfile, token1, token2)
1698 const cpp_token *token1, *token2;
1700 enum cpp_ttype a = token1->type, b = token2->type;
1703 if (token1->flags & NAMED_OP)
1705 if (token2->flags & NAMED_OP)
1709 if (token2->flags & DIGRAPH)
1710 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1711 else if (token_spellings[b].category == SPELL_OPERATOR)
1712 c = token_spellings[b].name[0];
1714 /* Quickly get everything that can paste with an '='. */
1715 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1720 case CPP_GREATER: return c == '>' || c == '?';
1721 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1722 case CPP_PLUS: return c == '+';
1723 case CPP_MINUS: return c == '-' || c == '>';
1724 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1725 case CPP_MOD: return c == ':' || c == '>';
1726 case CPP_AND: return c == '&';
1727 case CPP_OR: return c == '|';
1728 case CPP_COLON: return c == ':' || c == '>';
1729 case CPP_DEREF: return c == '*';
1730 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1731 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1732 case CPP_NAME: return ((b == CPP_NUMBER
1733 && name_p (pfile, &token2->val.str))
1735 || b == CPP_CHAR || b == CPP_STRING); /* L */
1736 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1737 || c == '.' || c == '+' || c == '-');
1738 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1739 && token1->val.c == '@'
1740 && (b == CPP_NAME || b == CPP_STRING));
1747 /* Output all the remaining tokens on the current line, and a newline
1748 character, to FP. Leading whitespace is removed. If there are
1749 macros, special token padding is not performed. */
1751 cpp_output_line (pfile, fp)
1755 const cpp_token *token;
1757 token = cpp_get_token (pfile);
1758 while (token->type != CPP_EOF)
1760 cpp_output_token (token, fp);
1761 token = cpp_get_token (pfile);
1762 if (token->flags & PREV_WHITE)
1769 /* Returns the value of a hexadecimal digit. */
1775 return hex_value (c);
1780 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1781 failure if cpplib is not parsing C++ or C99. Such failure is
1782 silent, and no variables are updated. Otherwise returns 0, and
1783 warns if -Wtraditional.
1785 [lex.charset]: The character designated by the universal character
1786 name \UNNNNNNNN is that character whose character short name in
1787 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1788 universal character name \uNNNN is that character whose character
1789 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1790 for a universal character name is less than 0x20 or in the range
1791 0x7F-0x9F (inclusive), or if the universal character name
1792 designates a character in the basic source character set, then the
1793 program is ill-formed.
1795 We assume that wchar_t is Unicode, so we don't need to do any
1796 mapping. Is this ever wrong?
1798 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1799 LIMIT is the end of the string or charconst. PSTR is updated to
1800 point after the UCS on return, and the UCS is written into PC. */
1803 maybe_read_ucs (pfile, pstr, limit, pc)
1805 const unsigned char **pstr;
1806 const unsigned char *limit;
1809 const unsigned char *p = *pstr;
1810 unsigned int code = 0;
1811 unsigned int c = *pc, length;
1813 /* Only attempt to interpret a UCS for C++ and C99. */
1814 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1817 if (CPP_WTRADITIONAL (pfile))
1818 cpp_error (pfile, DL_WARNING,
1819 "the meaning of '\\%c' is different in traditional C", c);
1821 length = (c == 'u' ? 4: 8);
1823 if ((size_t) (limit - p) < length)
1825 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1826 /* Skip to the end to avoid more diagnostics. */
1831 for (; length; length--, p++)
1835 code = (code << 4) + hex_digit_value (c);
1838 cpp_error (pfile, DL_ERROR,
1839 "non-hex digit '%c' in universal-character-name", c);
1840 /* We shouldn't skip in case there are multibyte chars. */
1846 #ifdef TARGET_EBCDIC
1847 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1848 code = 0x3f; /* EBCDIC invalid character */
1850 /* True extended characters are OK. */
1852 && !(code & 0x80000000)
1853 && !(code >= 0xD800 && code <= 0xDFFF))
1855 /* The standard permits $, @ and ` to be specified as UCNs. We use
1856 hex escapes so that this also works with EBCDIC hosts. */
1857 else if (code == 0x24 || code == 0x40 || code == 0x60)
1859 /* Don't give another error if one occurred above. */
1860 else if (length == 0)
1861 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1869 /* Returns the value of an escape sequence, truncated to the correct
1870 target precision. PSTR points to the input pointer, which is just
1871 after the backslash. LIMIT is how much text we have. WIDE is true
1872 if the escape sequence is part of a wide character constant or
1873 string literal. Handles all relevant diagnostics. */
1875 cpp_parse_escape (pfile, pstr, limit, wide)
1877 const unsigned char **pstr;
1878 const unsigned char *limit;
1882 const unsigned char *str = *pstr;
1887 width = CPP_OPTION (pfile, wchar_precision);
1889 width = CPP_OPTION (pfile, char_precision);
1890 if (width < BITS_PER_CPPCHAR_T)
1891 mask = ((cppchar_t) 1 << width) - 1;
1898 case '\\': case '\'': case '"': case '?': break;
1899 case 'b': c = TARGET_BS; break;
1900 case 'f': c = TARGET_FF; break;
1901 case 'n': c = TARGET_NEWLINE; break;
1902 case 'r': c = TARGET_CR; break;
1903 case 't': c = TARGET_TAB; break;
1904 case 'v': c = TARGET_VT; break;
1906 case '(': case '{': case '[': case '%':
1907 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1908 '\%' is used to prevent SCCS from getting confused. */
1909 unknown = CPP_PEDANTIC (pfile);
1913 if (CPP_WTRADITIONAL (pfile))
1914 cpp_error (pfile, DL_WARNING,
1915 "the meaning of '\\a' is different in traditional C");
1920 if (CPP_PEDANTIC (pfile))
1921 cpp_error (pfile, DL_PEDWARN,
1922 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1927 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1931 if (CPP_WTRADITIONAL (pfile))
1932 cpp_error (pfile, DL_WARNING,
1933 "the meaning of '\\x' is different in traditional C");
1936 cppchar_t i = 0, overflow = 0;
1937 int digits_found = 0;
1945 overflow |= i ^ (i << 4 >> 4);
1946 i = (i << 4) + hex_digit_value (c);
1951 cpp_error (pfile, DL_ERROR,
1952 "\\x used with no following hex digits");
1954 if (overflow | (i != (i & mask)))
1956 cpp_error (pfile, DL_PEDWARN,
1957 "hex escape sequence out of range");
1964 case '0': case '1': case '2': case '3':
1965 case '4': case '5': case '6': case '7':
1968 cppchar_t i = c - '0';
1970 while (str < limit && ++count < 3)
1973 if (c < '0' || c > '7')
1976 i = (i << 3) + c - '0';
1979 if (i != (i & mask))
1981 cpp_error (pfile, DL_PEDWARN,
1982 "octal escape sequence out of range");
1997 cpp_error (pfile, DL_PEDWARN,
1998 "unknown escape sequence '\\%c'", (int) c);
2000 cpp_error (pfile, DL_PEDWARN,
2001 "unknown escape sequence: '\\%03o'", (int) c);
2006 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
2014 /* Interpret a (possibly wide) character constant in TOKEN.
2015 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
2016 points to a variable that is filled in with the number of
2017 characters seen, and UNSIGNEDP to a variable that indicates whether
2018 the result has signed type. */
2020 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
2022 const cpp_token *token;
2023 unsigned int *pchars_seen;
2026 const unsigned char *str = token->val.str.text;
2027 const unsigned char *limit = str + token->val.str.len;
2028 unsigned int chars_seen = 0;
2029 size_t width, max_chars;
2030 cppchar_t c, mask, result = 0;
2033 #ifdef MULTIBYTE_CHARS
2034 (void) local_mbtowc (NULL, NULL, 0);
2037 /* Width in bits. */
2038 if (token->type == CPP_CHAR)
2040 width = CPP_OPTION (pfile, char_precision);
2041 max_chars = CPP_OPTION (pfile, int_precision) / width;
2042 unsigned_p = CPP_OPTION (pfile, unsigned_char);
2046 width = CPP_OPTION (pfile, wchar_precision);
2048 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
2051 if (width < BITS_PER_CPPCHAR_T)
2052 mask = ((cppchar_t) 1 << width) - 1;
2058 #ifdef MULTIBYTE_CHARS
2062 char_len = local_mbtowc (&wc, str, limit - str);
2065 cpp_error (pfile, DL_WARNING,
2066 "ignoring invalid multibyte character");
2079 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
2081 #ifdef MAP_CHARACTER
2083 c = MAP_CHARACTER (c);
2088 /* Truncate the character, scale the result and merge the two. */
2090 if (width < BITS_PER_CPPCHAR_T)
2091 result = (result << width) | c;
2096 if (chars_seen == 0)
2097 cpp_error (pfile, DL_ERROR, "empty character constant");
2098 else if (chars_seen > 1)
2100 /* Multichar charconsts are of type int and therefore signed. */
2103 if (chars_seen > max_chars)
2105 chars_seen = max_chars;
2106 cpp_error (pfile, DL_WARNING,
2107 "character constant too long for its type");
2109 else if (CPP_OPTION (pfile, warn_multichar))
2110 cpp_error (pfile, DL_WARNING, "multi-character character constant");
2113 /* Sign-extend or truncate the constant to cppchar_t. The value is
2114 in WIDTH bits, but for multi-char charconsts it's value is the
2115 full target type's width. */
2118 if (width < BITS_PER_CPPCHAR_T)
2120 mask = ((cppchar_t) 1 << width) - 1;
2121 if (unsigned_p || !(result & (1 << (width - 1))))
2127 *pchars_seen = chars_seen;
2128 *unsignedp = unsigned_p;
2132 /* Memory buffers. Changing these three constants can have a dramatic
2133 effect on performance. The values here are reasonable defaults,
2134 but might be tuned. If you adjust them, be sure to test across a
2135 range of uses of cpplib, including heavy nested function-like macro
2136 expansion. Also check the change in peak memory usage (NJAMD is a
2137 good tool for this). */
2138 #define MIN_BUFF_SIZE 8000
2139 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2140 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2141 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2143 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2144 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2147 /* Create a new allocation buffer. Place the control block at the end
2148 of the buffer, so that buffer overflows will cause immediate chaos. */
2154 unsigned char *base;
2156 if (len < MIN_BUFF_SIZE)
2157 len = MIN_BUFF_SIZE;
2158 len = CPP_ALIGN (len);
2160 base = xmalloc (len + sizeof (_cpp_buff));
2161 result = (_cpp_buff *) (base + len);
2162 result->base = base;
2164 result->limit = base + len;
2165 result->next = NULL;
2169 /* Place a chain of unwanted allocation buffers on the free list. */
2171 _cpp_release_buff (pfile, buff)
2175 _cpp_buff *end = buff;
2179 end->next = pfile->free_buffs;
2180 pfile->free_buffs = buff;
2183 /* Return a free buffer of size at least MIN_SIZE. */
2185 _cpp_get_buff (pfile, min_size)
2189 _cpp_buff *result, **p;
2191 for (p = &pfile->free_buffs;; p = &(*p)->next)
2196 return new_buff (min_size);
2198 size = result->limit - result->base;
2199 /* Return a buffer that's big enough, but don't waste one that's
2201 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2206 result->next = NULL;
2207 result->cur = result->base;
2211 /* Creates a new buffer with enough space to hold the uncommitted
2212 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2213 the excess bytes to the new buffer. Chains the new buffer after
2214 BUFF, and returns the new buffer. */
2216 _cpp_append_extend_buff (pfile, buff, min_extra)
2221 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2222 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2224 buff->next = new_buff;
2225 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2229 /* Creates a new buffer with enough space to hold the uncommitted
2230 remaining bytes of the buffer pointed to by BUFF, and at least
2231 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2232 Chains the new buffer before the buffer pointed to by BUFF, and
2233 updates the pointer to point to the new buffer. */
2235 _cpp_extend_buff (pfile, pbuff, min_extra)
2240 _cpp_buff *new_buff, *old_buff = *pbuff;
2241 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2243 new_buff = _cpp_get_buff (pfile, size);
2244 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2245 new_buff->next = old_buff;
2249 /* Free a chain of buffers starting at BUFF. */
2251 _cpp_free_buff (buff)
2256 for (; buff; buff = next)
2263 /* Allocate permanent, unaligned storage of length LEN. */
2265 _cpp_unaligned_alloc (pfile, len)
2269 _cpp_buff *buff = pfile->u_buff;
2270 unsigned char *result = buff->cur;
2272 if (len > (size_t) (buff->limit - result))
2274 buff = _cpp_get_buff (pfile, len);
2275 buff->next = pfile->u_buff;
2276 pfile->u_buff = buff;
2280 buff->cur = result + len;
2284 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2285 That buffer is used for growing allocations when saving macro
2286 replacement lists in a #define, and when parsing an answer to an
2287 assertion in #assert, #unassert or #if (and therefore possibly
2288 whilst expanding macros). It therefore must not be used by any
2289 code that they might call: specifically the lexer and the guts of
2292 All existing other uses clearly fit this restriction: storing
2293 registered pragmas during initialization. */
2295 _cpp_aligned_alloc (pfile, len)
2299 _cpp_buff *buff = pfile->a_buff;
2300 unsigned char *result = buff->cur;
2302 if (len > (size_t) (buff->limit - result))
2304 buff = _cpp_get_buff (pfile, len);
2305 buff->next = pfile->a_buff;
2306 pfile->a_buff = buff;
2310 buff->cur = result + len;