1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #ifdef MULTIBYTE_CHARS
34 /* Tokens with SPELL_STRING store their spelling in the token list,
35 and it's length in the token->val.name.len. */
48 enum spell_type category;
49 const unsigned char *name;
52 static const unsigned char *const digraph_spellings[] =
53 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
55 #define OP(e, s) { SPELL_OPERATOR, U s },
56 #define TK(e, s) { s, U STRINGX (e) },
57 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
61 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
62 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
63 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
65 static void handle_newline PARAMS ((cpp_reader *));
66 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
67 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
69 static int skip_asm_block PARAMS ((cpp_reader *));
70 static int skip_block_comment PARAMS ((cpp_reader *));
71 static int skip_line_comment PARAMS ((cpp_reader *));
72 static void adjust_column PARAMS ((cpp_reader *));
73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
77 static void pedantic_parse_number PARAMS ((cpp_reader *, cpp_string *, int));
78 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
79 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
80 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
81 static bool trigraph_p PARAMS ((cpp_reader *));
82 static unsigned int copy_text_chars PARAMS ((char *, const char *, unsigned int));
83 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const uchar *));
84 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
86 static bool continue_after_nul PARAMS ((cpp_reader *));
87 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
88 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
89 const unsigned char *, cppchar_t *));
90 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
92 static unsigned int hex_digit_value PARAMS ((unsigned int));
93 static _cpp_buff *new_buff PARAMS ((size_t));
97 Compares, the token TOKEN to the NUL-terminated string STRING.
98 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
100 cpp_ideq (token, string)
101 const cpp_token *token;
104 if (token->type != CPP_NAME)
107 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
110 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
111 Returns with buffer->cur pointing to the character immediately
112 following the newline (combination). */
114 handle_newline (pfile)
117 cpp_buffer *buffer = pfile->buffer;
119 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
120 only accept CR-LF; maybe we should fall back to that behavior? */
121 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
124 buffer->line_base = buffer->cur;
125 buffer->col_adjust = 0;
129 /* Subroutine of skip_escaped_newlines; called when a 3-character
130 sequence beginning with "??" is encountered. buffer->cur points to
133 Warn if necessary, and returns true if the sequence forms a
134 trigraph and the trigraph should be honored. */
139 cpp_buffer *buffer = pfile->buffer;
140 cppchar_t from_char = buffer->cur[1];
143 if (!_cpp_trigraph_map[from_char])
146 accept = CPP_OPTION (pfile, trigraphs);
148 /* Don't warn about trigraphs in comments. */
149 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
152 cpp_error_with_line (pfile, DL_WARNING,
153 pfile->line, CPP_BUF_COL (buffer) - 1,
154 "trigraph ??%c converted to %c",
156 (int) _cpp_trigraph_map[from_char]);
157 else if (buffer->cur != buffer->last_Wtrigraphs)
159 buffer->last_Wtrigraphs = buffer->cur;
160 cpp_error_with_line (pfile, DL_WARNING,
161 pfile->line, CPP_BUF_COL (buffer) - 1,
162 "trigraph ??%c ignored", (int) from_char);
169 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
170 lie in buffer->cur[-1]. Returns the next byte, which will be in
171 buffer->cur[-1]. This routine performs preprocessing stages 1 and
172 2 of the ISO C standard. */
174 skip_escaped_newlines (pfile)
177 cpp_buffer *buffer = pfile->buffer;
178 cppchar_t next = buffer->cur[-1];
180 /* Only do this if we apply stages 1 and 2. */
181 if (!buffer->from_stage3)
183 const unsigned char *saved_cur;
190 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
193 /* Translate the trigraph. */
194 next = _cpp_trigraph_map[buffer->cur[1]];
200 if (buffer->cur == buffer->rlimit)
203 /* We have a backslash, and room for at least one more
204 character. Skip horizontal whitespace. */
205 saved_cur = buffer->cur;
207 next1 = *buffer->cur++;
208 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
210 if (!is_vspace (next1))
212 buffer->cur = saved_cur;
216 if (saved_cur != buffer->cur - 1
217 && !pfile->state.lexing_comment)
218 cpp_error (pfile, DL_WARNING,
219 "backslash and newline separated by space");
221 handle_newline (pfile);
222 buffer->backup_to = buffer->cur;
223 if (buffer->cur == buffer->rlimit)
225 cpp_error (pfile, DL_PEDWARN,
226 "backslash-newline at end of file");
230 next = *buffer->cur++;
232 while (next == '\\' || next == '?');
238 /* Obtain the next character, after trigraph conversion and skipping
239 an arbitrarily long string of escaped newlines. The common case of
240 no trigraphs or escaped newlines falls through quickly. On return,
241 buffer->backup_to points to where to return to if the character is
242 not to be processed. */
244 get_effective_char (pfile)
248 cpp_buffer *buffer = pfile->buffer;
250 buffer->backup_to = buffer->cur;
251 next = *buffer->cur++;
252 if (__builtin_expect (next == '?' || next == '\\', 0))
253 next = skip_escaped_newlines (pfile);
258 /* SDCC _asm specific */
259 /* Skip an _asm ... _endasm block. We find the end of the comment by
260 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
263 skip_asm_block (pfile)
266 #define _ENDASM_STR "endasm"
267 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
269 cpp_buffer *buffer = pfile->buffer;
274 pfile->state.lexing_comment = 1;
275 while (buffer->cur != buffer->rlimit)
277 prev_space = is_space(c);
280 /* FIXME: For speed, create a new character class of characters
281 of interest inside block comments. */
282 if (c == '?' || c == '\\')
283 c = skip_escaped_newlines (pfile);
285 if (prev_space && c == '_')
287 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
288 strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
290 buffer->cur += _ENDASM_LEN;
295 else if (is_vspace (c))
297 prev_space = is_space(c);
298 handle_newline (pfile);
301 adjust_column (pfile);
304 pfile->state.lexing_comment = 0;
308 /* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
310 nonzero if comment terminated by EOF, zero otherwise. */
312 skip_block_comment (pfile)
315 cpp_buffer *buffer = pfile->buffer;
316 cppchar_t c = EOF, prevc = EOF;
318 pfile->state.lexing_comment = 1;
319 while (buffer->cur != buffer->rlimit)
321 prevc = c, c = *buffer->cur++;
323 /* FIXME: For speed, create a new character class of characters
324 of interest inside block comments. */
325 if (c == '?' || c == '\\')
326 c = skip_escaped_newlines (pfile);
328 /* People like decorating comments with '*', so check for '/'
329 instead for efficiency. */
335 /* Warn about potential nested comments, but not if the '/'
336 comes immediately before the true comment delimiter.
337 Don't bother to get it right across escaped newlines. */
338 if (CPP_OPTION (pfile, warn_comments)
339 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
340 cpp_error_with_line (pfile, DL_WARNING,
341 pfile->line, CPP_BUF_COL (buffer),
342 "\"/*\" within comment");
344 else if (is_vspace (c))
345 handle_newline (pfile);
347 adjust_column (pfile);
350 pfile->state.lexing_comment = 0;
351 return c != '/' || prevc != '*';
354 /* Skip a C++ line comment, leaving buffer->cur pointing to the
355 terminating newline. Handles escaped newlines. Returns nonzero
356 if a multiline comment. */
358 skip_line_comment (pfile)
361 cpp_buffer *buffer = pfile->buffer;
362 unsigned int orig_line = pfile->line;
364 #ifdef MULTIBYTE_CHARS
369 pfile->state.lexing_comment = 1;
370 #ifdef MULTIBYTE_CHARS
371 /* Reset multibyte conversion state. */
372 (void) local_mbtowc (NULL, NULL, 0);
376 if (buffer->cur == buffer->rlimit)
379 #ifdef MULTIBYTE_CHARS
380 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
381 buffer->rlimit - buffer->cur);
384 cpp_error (pfile, DL_WARNING,
385 "ignoring invalid multibyte character");
391 buffer->cur += char_len;
397 if (c == '?' || c == '\\')
398 c = skip_escaped_newlines (pfile);
400 while (!is_vspace (c));
402 /* Step back over the newline, except at EOF. */
406 pfile->state.lexing_comment = 0;
407 return orig_line != pfile->line;
410 /* pfile->buffer->cur is one beyond the \t character. Update
411 col_adjust so we track the column correctly. */
413 adjust_column (pfile)
416 cpp_buffer *buffer = pfile->buffer;
417 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
419 /* Round it up to multiple of the tabstop, but subtract 1 since the
420 tab itself occupies a character position. */
421 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
422 - col % CPP_OPTION (pfile, tabstop)) - 1;
425 /* Skips whitespace, saving the next non-whitespace character.
426 Adjusts pfile->col_adjust to account for tabs. Without this,
427 tokens might be assigned an incorrect column. */
429 skip_whitespace (pfile, c)
433 cpp_buffer *buffer = pfile->buffer;
434 unsigned int warned = 0;
438 /* Horizontal space always OK. */
442 adjust_column (pfile);
443 /* Just \f \v or \0 left. */
446 if (buffer->cur - 1 == buffer->rlimit)
450 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
454 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
455 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
456 CPP_BUF_COL (buffer),
457 "%s in preprocessing directive",
458 c == '\f' ? "form feed" : "vertical tab");
462 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
463 while (is_nvspace (c));
469 /* See if the characters of a number token are valid in a name (no
472 name_p (pfile, string)
474 const cpp_string *string;
478 for (i = 0; i < string->len; i++)
479 if (!is_idchar (string->text[i]))
485 /* Parse an identifier, skipping embedded backslash-newlines. This is
486 a critical inner loop. The common case is an identifier which has
487 not been split by backslash-newline, does not contain a dollar
488 sign, and has already been scanned (roughly 10:1 ratio of
489 seen:unseen identifiers in normal code; the distribution is
490 Poisson-like). Second most common case is a new identifier, not
491 split and no dollar sign. The other possibilities are rare and
492 have been relegated to parse_slow. */
493 static cpp_hashnode *
494 parse_identifier (pfile)
497 cpp_hashnode *result;
498 const uchar *cur, *base;
500 /* Fast-path loop. Skim over a normal identifier.
501 N.B. ISIDNUM does not include $. */
502 cur = pfile->buffer->cur;
503 while (ISIDNUM (*cur))
506 /* Check for slow-path cases. */
507 if (*cur == '?' || *cur == '\\' || *cur == '$')
511 base = parse_slow (pfile, cur, 0, &len);
512 result = (cpp_hashnode *)
513 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
517 base = pfile->buffer->cur - 1;
518 pfile->buffer->cur = cur;
519 result = (cpp_hashnode *)
520 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
523 /* Rarely, identifiers require diagnostics when lexed.
524 XXX Has to be forced out of the fast path. */
525 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
526 && !pfile->state.skipping, 0))
528 /* It is allowed to poison the same identifier twice. */
529 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
530 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
533 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
534 replacement list of a variadic macro. */
535 if (result == pfile->spec_nodes.n__VA_ARGS__
536 && !pfile->state.va_args_ok)
537 cpp_error (pfile, DL_PEDWARN,
538 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
544 /* Slow path. This handles numbers and identifiers which have been
545 split, or contain dollar signs. The part of the token from
546 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
547 1 if it's a number, and 2 if it has a leading period. Returns a
548 pointer to the token's NUL-terminated spelling in permanent
549 storage, and sets PLEN to its length. */
551 parse_slow (pfile, cur, number_p, plen)
557 cpp_buffer *buffer = pfile->buffer;
558 const uchar *base = buffer->cur - 1;
559 struct obstack *stack = &pfile->hash_table->stack;
560 unsigned int c, prevc, saw_dollar = 0;
562 /* Place any leading period. */
564 obstack_1grow (stack, '.');
566 /* Copy the part of the token which is known to be okay. */
567 obstack_grow (stack, base, cur - base);
569 /* Now process the part which isn't. We are looking at one of
570 '$', '\\', or '?' on entry to this loop. */
576 /* Potential escaped newline? */
577 buffer->backup_to = buffer->cur - 1;
578 if (c == '?' || c == '\\')
579 c = skip_escaped_newlines (pfile);
585 if (c != '.' && !VALID_SIGN (c, prevc))
589 /* Handle normal identifier characters in this loop. */
593 obstack_1grow (stack, c);
600 while (is_idchar (c));
603 /* Step back over the unwanted char. */
606 /* $ is not an identifier character in the standard, but is commonly
607 accepted as an extension. Don't warn about it in skipped
608 conditional blocks. */
609 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
610 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
612 /* Identifiers and numbers are null-terminated. */
613 *plen = obstack_object_size (stack);
614 obstack_1grow (stack, '\0');
615 return obstack_finish (stack);
619 /* Pedantic parse a number, beginning with character C, skipping embedded
620 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
621 before C. Place the result in NUMBER. */
623 pedantic_parse_number (pfile, number, leading_period)
628 enum num_type_e { NT_DEC, NT_HEX } num_type = NT_DEC;
629 enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
631 uchar c = *(pfile->buffer->cur - 1);
632 struct obstack *stack = &pfile->hash_table->stack;
633 cpp_buffer *buffer = pfile->buffer;
642 obstack_1grow (stack, '.');
643 c = get_effective_char(pfile);
651 obstack_1grow (stack, c);
652 c = get_effective_char(pfile);
660 obstack_1grow (stack, c);
661 c = get_effective_char(pfile);
667 obstack_1grow (stack, c);
668 c = get_effective_char(pfile);
679 if (NT_DEC == num_type)
685 obstack_1grow (stack, c);
686 c = get_effective_char(pfile);
693 obstack_1grow (stack, c);
694 c = get_effective_char(pfile);
697 else if ('E' == c || 'e' == c)
699 if (has_whole || has_fract)
703 obstack_1grow (stack, c);
704 c = get_effective_char(pfile);
717 obstack_1grow (stack, c);
718 c = get_effective_char(pfile);
725 obstack_1grow (stack, c);
726 c = get_effective_char(pfile);
729 else if ('P' == c || 'p' == c)
731 if (has_whole || has_fract)
735 obstack_1grow (stack, c);
736 c = get_effective_char(pfile);
743 num_part = NP_INT_SUFFIX;
747 if (NT_DEC == num_type)
753 obstack_1grow (stack, c);
754 c = get_effective_char(pfile);
757 if ('E' == c || 'e' == c)
759 if (has_whole || has_fract)
763 obstack_1grow (stack, c);
764 c = get_effective_char(pfile);
775 obstack_1grow (stack, c);
776 c = get_effective_char(pfile);
779 if ('P' == c || 'p' == c)
781 if (has_whole || has_fract)
785 obstack_1grow (stack, c);
786 c = get_effective_char(pfile);
791 num_part = NP_FLOAT_SUFFIX;
795 if ('+' == c || '-' == c)
798 obstack_1grow (stack, c);
799 c = get_effective_char(pfile);
805 obstack_1grow (stack, c);
806 c = get_effective_char(pfile);
809 num_part = NP_FLOAT_SUFFIX;
813 if ('L' == c || 'l' == c)
818 obstack_1grow (stack, c);
819 c = get_effective_char(pfile);
824 obstack_1grow (stack, c);
825 c = get_effective_char(pfile);
828 else if ('U' == c || 'u' == c)
831 obstack_1grow (stack, c);
832 c = get_effective_char(pfile);
836 case NP_FLOAT_SUFFIX:
837 if ('F' == c || 'f' == c)
840 obstack_1grow (stack, c);
841 c = get_effective_char(pfile);
843 else if ('L' == c || 'l' == c)
846 obstack_1grow (stack, c);
847 c = get_effective_char(pfile);
854 /* Step back over the unwanted char. */
857 number->text = obstack_finish (stack);
861 /* Parse a number, beginning with character C, skipping embedded
862 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
863 before C. Place the result in NUMBER. */
865 parse_number (pfile, number, leading_period)
872 /* Fast-path loop. Skim over a normal number.
873 N.B. ISIDNUM does not include $. */
874 cur = pfile->buffer->cur;
875 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
878 /* Check for slow-path cases. */
879 if (*cur == '?' || *cur == '\\' || *cur == '$')
880 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
883 const uchar *base = pfile->buffer->cur - 1;
886 number->len = cur - base + leading_period;
887 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
888 dest[number->len] = '\0';
893 memcpy (dest, base, cur - base);
894 pfile->buffer->cur = cur;
898 /* Subroutine of parse_string. */
900 unescaped_terminator_p (pfile, dest)
902 const unsigned char *dest;
904 const unsigned char *start, *temp;
906 /* In #include-style directives, terminators are not escapeable. */
907 if (pfile->state.angled_headers)
910 start = BUFF_FRONT (pfile->u_buff);
912 /* An odd number of consecutive backslashes represents an escaped
914 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
917 return ((dest - temp) & 1) == 0;
920 /* Parses a string, character constant, or angle-bracketed header file
921 name. Handles embedded trigraphs and escaped newlines. The stored
922 string is guaranteed NUL-terminated, but it is not guaranteed that
923 this is the first NUL since embedded NULs are preserved.
925 When this function returns, buffer->cur points to the next
926 character to be processed. */
928 parse_string (pfile, token, terminator)
931 cppchar_t terminator;
933 cpp_buffer *buffer = pfile->buffer;
934 unsigned char *dest, *limit;
936 bool warned_nulls = false;
937 #ifdef MULTIBYTE_CHARS
942 dest = BUFF_FRONT (pfile->u_buff);
943 limit = BUFF_LIMIT (pfile->u_buff);
945 #ifdef MULTIBYTE_CHARS
946 /* Reset multibyte conversion state. */
947 (void) local_mbtowc (NULL, NULL, 0);
951 /* We need room for another char, possibly the terminating NUL. */
952 if ((size_t) (limit - dest) < 1)
954 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
955 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
956 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
957 limit = BUFF_LIMIT (pfile->u_buff);
960 #ifdef MULTIBYTE_CHARS
961 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
962 buffer->rlimit - buffer->cur);
965 cpp_error (pfile, DL_WARNING,
966 "ignoring invalid multibyte character");
972 buffer->cur += char_len;
979 /* Handle trigraphs, escaped newlines etc. */
980 if (c == '?' || c == '\\')
981 c = skip_escaped_newlines (pfile);
985 if (unescaped_terminator_p (pfile, dest))
988 else if (is_vspace (c))
990 /* No string literal may extend over multiple lines. In
991 assembly language, suppress the error except for <>
992 includes. This is a kludge around not knowing where
995 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
996 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
1003 if (buffer->cur - 1 == buffer->rlimit)
1007 warned_nulls = true;
1008 cpp_error (pfile, DL_WARNING,
1009 "null character(s) preserved in literal");
1012 #ifdef MULTIBYTE_CHARS
1015 for ( ; char_len > 0; --char_len)
1016 *dest++ = (*buffer->cur - char_len);
1025 token->val.str.text = BUFF_FRONT (pfile->u_buff);
1026 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
1027 BUFF_FRONT (pfile->u_buff) = dest + 1;
1030 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
1031 comment blocks (when executed with -C option) and
1032 _asm (SDCPP specific) blocks */
1034 /* Count and copy characters from src to dest, excluding CRs:
1035 CRs are automatically generated, because the output is
1036 opened in TEXT mode. If dest == NULL, only count chars */
1038 copy_text_chars (dest, src, len)
1046 for (p = src; p != src + len; ++p)
1061 /* SDCC _asm specific */
1062 /* The stored comment includes the comment start and any terminator. */
1064 save_asm (pfile, token, from)
1067 const unsigned char *from;
1069 #define _ASM_STR "_asm"
1070 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
1072 unsigned char *buffer;
1073 unsigned int text_len, len;
1075 len = pfile->buffer->cur - from;
1076 /* + _ASM_LEN for the initial '_asm'. */
1077 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
1078 buffer = _cpp_unaligned_alloc (pfile, text_len);
1081 token->type = CPP_ASM;
1082 token->val.str.len = text_len;
1083 token->val.str.text = buffer;
1085 memcpy (buffer, _ASM_STR, _ASM_LEN);
1086 copy_text_chars (buffer + _ASM_LEN, from, len);
1089 /* The stored comment includes the comment start and any terminator. */
1091 save_comment (pfile, token, from, type)
1094 const unsigned char *from;
1097 unsigned char *buffer;
1098 unsigned int len, clen;
1100 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1102 /* C++ comments probably (not definitely) have moved past a new
1103 line, which we don't want to save in the comment. */
1104 if (is_vspace (pfile->buffer->cur[-1]))
1107 /* If we are currently in a directive, then we need to store all
1108 C++ comments as C comments internally, and so we need to
1109 allocate a little extra space in that case.
1111 Note that the only time we encounter a directive here is
1112 when we are saving comments in a "#define". */
1113 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1115 buffer = _cpp_unaligned_alloc (pfile, clen);
1117 token->type = CPP_COMMENT;
1118 token->val.str.len = clen;
1119 token->val.str.text = buffer;
1122 copy_text_chars (buffer + 1, from, len);
1124 /* Finish conversion to a C comment, if necessary. */
1125 if (pfile->state.in_directive && type == '/')
1128 buffer[clen - 2] = '*';
1129 buffer[clen - 1] = '/';
1133 /* Allocate COUNT tokens for RUN. */
1135 _cpp_init_tokenrun (run, count)
1139 run->base = xnewvec (cpp_token, count);
1140 run->limit = run->base + count;
1144 /* Returns the next tokenrun, or creates one if there is none. */
1149 if (run->next == NULL)
1151 run->next = xnew (tokenrun);
1152 run->next->prev = run;
1153 _cpp_init_tokenrun (run->next, 250);
1159 /* Allocate a single token that is invalidated at the same time as the
1160 rest of the tokens on the line. Has its line and col set to the
1161 same as the last lexed token, so that diagnostics appear in the
1164 _cpp_temp_token (pfile)
1167 cpp_token *old, *result;
1169 old = pfile->cur_token - 1;
1170 if (pfile->cur_token == pfile->cur_run->limit)
1172 pfile->cur_run = next_tokenrun (pfile->cur_run);
1173 pfile->cur_token = pfile->cur_run->base;
1176 result = pfile->cur_token++;
1177 result->line = old->line;
1178 result->col = old->col;
1182 /* Lex a token into RESULT (external interface). Takes care of issues
1183 like directive handling, token lookahead, multiple include
1184 optimization and skipping. */
1186 _cpp_lex_token (pfile)
1193 if (pfile->cur_token == pfile->cur_run->limit)
1195 pfile->cur_run = next_tokenrun (pfile->cur_run);
1196 pfile->cur_token = pfile->cur_run->base;
1199 if (pfile->lookaheads)
1201 pfile->lookaheads--;
1202 result = pfile->cur_token++;
1205 result = _cpp_lex_direct (pfile);
1207 if (result->flags & BOL)
1209 /* Is this a directive. If _cpp_handle_directive returns
1210 false, it is an assembler #. */
1211 if (result->type == CPP_HASH
1212 /* 6.10.3 p 11: Directives in a list of macro arguments
1213 gives undefined behavior. This implementation
1214 handles the directive as normal. */
1215 && pfile->state.parsing_args != 1
1216 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1218 if (pfile->cb.line_change && !pfile->state.skipping)
1219 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
1222 /* We don't skip tokens in directives. */
1223 if (pfile->state.in_directive)
1226 /* Outside a directive, invalidate controlling macros. At file
1227 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1228 get here and MI optimisation works. */
1229 pfile->mi_valid = false;
1231 if (!pfile->state.skipping || result->type == CPP_EOF)
1238 /* A NUL terminates the current buffer. For ISO preprocessing this is
1239 EOF, but for traditional preprocessing it indicates we need a line
1240 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
1241 to return a CPP_EOF to the caller. */
1243 continue_after_nul (pfile)
1246 cpp_buffer *buffer = pfile->buffer;
1249 buffer->saved_flags = BOL;
1250 if (CPP_OPTION (pfile, traditional))
1252 if (pfile->state.in_directive)
1255 _cpp_remove_overlay (pfile);
1256 more = _cpp_read_logical_line_trad (pfile);
1257 _cpp_overlay_buffer (pfile, pfile->out.base,
1258 pfile->out.cur - pfile->out.base);
1259 pfile->line = pfile->out.first_line;
1263 /* Stop parsing arguments with a CPP_EOF. When we finally come
1264 back here, do the work of popping the buffer. */
1265 if (!pfile->state.parsing_args)
1267 if (buffer->cur != buffer->line_base)
1269 /* Non-empty files should end in a newline. Don't warn
1270 for command line and _Pragma buffers. */
1271 if (!buffer->from_stage3)
1272 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
1273 handle_newline (pfile);
1276 /* Similarly, finish an in-progress directive with CPP_EOF
1277 before popping the buffer. */
1278 if (!pfile->state.in_directive && buffer->prev)
1280 more = !buffer->return_at_eof;
1281 _cpp_pop_buffer (pfile);
1289 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1291 if (get_effective_char (pfile) == CHAR) \
1292 result->type = THEN_TYPE; \
1296 result->type = ELSE_TYPE; \
1300 /* Lex a token into pfile->cur_token, which is also incremented, to
1301 get diagnostics pointing to the correct location.
1303 Does not handle issues such as token lookahead, multiple-include
1304 optimisation, directives, skipping etc. This function is only
1305 suitable for use by _cpp_lex_token, and in special cases like
1306 lex_expansion_token which doesn't care for any of these issues.
1308 When meeting a newline, returns CPP_EOF if parsing a directive,
1309 otherwise returns to the start of the token buffer if permissible.
1310 Returns the location of the lexed token. */
1312 _cpp_lex_direct (pfile)
1317 const unsigned char *comment_start;
1318 cpp_token *result = pfile->cur_token++;
1321 buffer = pfile->buffer;
1322 result->flags = buffer->saved_flags;
1323 buffer->saved_flags = 0;
1325 result->line = pfile->line;
1329 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1334 case ' ': case '\t': case '\f': case '\v': case '\0':
1335 result->flags |= PREV_WHITE;
1336 if (skip_whitespace (pfile, c))
1339 /* End of buffer. */
1341 if (continue_after_nul (pfile))
1343 result->type = CPP_EOF;
1346 case '\n': case '\r':
1347 handle_newline (pfile);
1348 buffer->saved_flags = BOL;
1349 if (! pfile->state.in_directive)
1351 if (pfile->state.parsing_args == 2)
1352 buffer->saved_flags |= PREV_WHITE;
1353 if (!pfile->keep_tokens)
1355 pfile->cur_run = &pfile->base_run;
1356 result = pfile->base_run.base;
1357 pfile->cur_token = result + 1;
1361 result->type = CPP_EOF;
1366 /* These could start an escaped newline, or '?' a trigraph. Let
1367 skip_escaped_newlines do all the work. */
1369 unsigned int line = pfile->line;
1371 c = skip_escaped_newlines (pfile);
1372 if (line != pfile->line)
1375 /* We had at least one escaped newline of some sort.
1376 Update the token's line and column. */
1377 goto update_tokens_line;
1381 /* We are either the original '?' or '\\', or a trigraph. */
1383 result->type = CPP_QUERY;
1390 case '0': case '1': case '2': case '3': case '4':
1391 case '5': case '6': case '7': case '8': case '9':
1392 result->type = CPP_NUMBER;
1393 if (CPP_OPTION(pfile, pedantic_parse_number))
1394 pedantic_parse_number (pfile, &result->val.str, 0);
1396 parse_number (pfile, &result->val.str, 0);
1400 /* 'L' may introduce wide characters or strings. */
1402 const unsigned char *pos = buffer->cur;
1404 c = get_effective_char (pfile);
1405 if (c == '\'' || c == '"')
1407 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1408 parse_string (pfile, result, c);
1417 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1418 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1419 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1420 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1422 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1423 case 'G': case 'H': case 'I': case 'J': case 'K':
1424 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1425 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1427 result->type = CPP_NAME;
1428 result->val.node = parse_identifier (pfile);
1430 /* SDCC _asm specific */
1431 /* handle _asm ... _endasm ; */
1432 if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1434 comment_start = buffer->cur;
1435 result->type = CPP_ASM;
1436 skip_asm_block (pfile);
1437 /* Save the _asm block as a token in its own right. */
1438 save_asm (pfile, result, comment_start);
1440 /* Convert named operators to their proper types. */
1441 else if (result->val.node->flags & NODE_OPERATOR)
1443 result->flags |= NAMED_OP;
1444 result->type = result->val.node->value.operator;
1450 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1451 parse_string (pfile, result, c);
1455 /* A potential block or line comment. */
1456 comment_start = buffer->cur;
1457 c = get_effective_char (pfile);
1461 if (skip_block_comment (pfile))
1462 cpp_error (pfile, DL_ERROR, "unterminated comment");
1464 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1465 || CPP_IN_SYSTEM_HEADER (pfile)))
1467 /* Warn about comments only if pedantically GNUC89, and not
1468 in system headers. */
1469 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1470 && ! buffer->warned_cplusplus_comments)
1472 cpp_error (pfile, DL_PEDWARN,
1473 "C++ style comments are not allowed in ISO C90");
1474 cpp_error (pfile, DL_PEDWARN,
1475 "(this will be reported only once per input file)");
1476 buffer->warned_cplusplus_comments = 1;
1479 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1480 cpp_error (pfile, DL_WARNING, "multi-line comment");
1484 result->type = CPP_DIV_EQ;
1490 result->type = CPP_DIV;
1494 if (!pfile->state.save_comments)
1496 result->flags |= PREV_WHITE;
1497 goto update_tokens_line;
1500 /* Save the comment as a token in its own right. */
1501 save_comment (pfile, result, comment_start, c);
1505 if (pfile->state.angled_headers)
1507 result->type = CPP_HEADER_NAME;
1508 parse_string (pfile, result, '>');
1512 c = get_effective_char (pfile);
1514 result->type = CPP_LESS_EQ;
1516 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1517 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1518 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1519 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1521 result->type = CPP_OPEN_SQUARE;
1522 result->flags |= DIGRAPH;
1524 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1526 result->type = CPP_OPEN_BRACE;
1527 result->flags |= DIGRAPH;
1532 result->type = CPP_LESS;
1537 c = get_effective_char (pfile);
1539 result->type = CPP_GREATER_EQ;
1541 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1542 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1543 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1547 result->type = CPP_GREATER;
1552 c = get_effective_char (pfile);
1554 result->type = CPP_MOD_EQ;
1555 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1557 result->flags |= DIGRAPH;
1558 result->type = CPP_HASH;
1559 if (get_effective_char (pfile) == '%')
1561 const unsigned char *pos = buffer->cur;
1563 if (get_effective_char (pfile) == ':')
1564 result->type = CPP_PASTE;
1566 buffer->cur = pos - 1;
1571 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1573 result->flags |= DIGRAPH;
1574 result->type = CPP_CLOSE_BRACE;
1579 result->type = CPP_MOD;
1584 result->type = CPP_DOT;
1585 c = get_effective_char (pfile);
1588 const unsigned char *pos = buffer->cur;
1590 if (get_effective_char (pfile) == '.')
1591 result->type = CPP_ELLIPSIS;
1593 buffer->cur = pos - 1;
1595 /* All known character sets have 0...9 contiguous. */
1596 else if (ISDIGIT (c))
1598 result->type = CPP_NUMBER;
1599 if (CPP_OPTION(pfile, pedantic_parse_number))
1600 pedantic_parse_number (pfile, &result->val.str, 1);
1602 parse_number (pfile, &result->val.str, 1);
1604 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1605 result->type = CPP_DOT_STAR;
1611 c = get_effective_char (pfile);
1613 result->type = CPP_PLUS_PLUS;
1615 result->type = CPP_PLUS_EQ;
1619 result->type = CPP_PLUS;
1624 c = get_effective_char (pfile);
1627 result->type = CPP_DEREF;
1628 if (CPP_OPTION (pfile, cplusplus))
1630 if (get_effective_char (pfile) == '*')
1631 result->type = CPP_DEREF_STAR;
1637 result->type = CPP_MINUS_MINUS;
1639 result->type = CPP_MINUS_EQ;
1643 result->type = CPP_MINUS;
1648 c = get_effective_char (pfile);
1650 result->type = CPP_AND_AND;
1652 result->type = CPP_AND_EQ;
1656 result->type = CPP_AND;
1661 c = get_effective_char (pfile);
1663 result->type = CPP_OR_OR;
1665 result->type = CPP_OR_EQ;
1669 result->type = CPP_OR;
1674 c = get_effective_char (pfile);
1675 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1676 result->type = CPP_SCOPE;
1677 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1679 result->flags |= DIGRAPH;
1680 result->type = CPP_CLOSE_SQUARE;
1685 result->type = CPP_COLON;
1689 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1690 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1691 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1692 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1693 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1695 case '~': result->type = CPP_COMPL; break;
1696 case ',': result->type = CPP_COMMA; break;
1697 case '(': result->type = CPP_OPEN_PAREN; break;
1698 case ')': result->type = CPP_CLOSE_PAREN; break;
1699 case '[': result->type = CPP_OPEN_SQUARE; break;
1700 case ']': result->type = CPP_CLOSE_SQUARE; break;
1701 case '{': result->type = CPP_OPEN_BRACE; break;
1702 case '}': result->type = CPP_CLOSE_BRACE; break;
1703 case ';': result->type = CPP_SEMICOLON; break;
1705 /* @ is a punctuator in Objective-C. */
1706 case '@': result->type = CPP_ATSIGN; break;
1709 if (CPP_OPTION (pfile, dollars_in_ident))
1711 /* Fall through... */
1715 result->type = CPP_OTHER;
1723 /* An upper bound on the number of bytes needed to spell TOKEN,
1724 including preceding whitespace. */
1726 cpp_token_len (token)
1727 const cpp_token *token;
1731 switch (TOKEN_SPELL (token))
1733 default: len = 0; break;
1735 case SPELL_STRING: len = token->val.str.len; break;
1736 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1738 /* 1 for whitespace, 4 for comment delimiters. */
1742 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1743 already contain the enough space to hold the token's spelling.
1744 Returns a pointer to the character after the last character
1747 cpp_spell_token (pfile, token, buffer)
1748 cpp_reader *pfile; /* Would be nice to be rid of this... */
1749 const cpp_token *token;
1750 unsigned char *buffer;
1752 switch (TOKEN_SPELL (token))
1754 case SPELL_OPERATOR:
1756 const unsigned char *spelling;
1759 if (token->flags & DIGRAPH)
1761 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1762 else if (token->flags & NAMED_OP)
1765 spelling = TOKEN_NAME (token);
1767 while ((c = *spelling++) != '\0')
1773 *buffer++ = token->val.c;
1778 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1779 buffer += NODE_LEN (token->val.node);
1783 memcpy (buffer, token->val.str.text, token->val.str.len);
1784 buffer += token->val.str.len;
1789 int left, right, tag;
1790 switch (token->type)
1792 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1793 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1794 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1795 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1796 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1798 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1799 TOKEN_NAME (token));
1802 if (tag) *buffer++ = tag;
1804 memcpy (buffer, token->val.str.text, token->val.str.len);
1805 buffer += token->val.str.len;
1811 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1818 /* Returns TOKEN spelt as a null-terminated string. The string is
1819 freed when the reader is destroyed. Useful for diagnostics. */
1821 cpp_token_as_text (pfile, token)
1823 const cpp_token *token;
1825 unsigned int len = cpp_token_len (token);
1826 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1828 end = cpp_spell_token (pfile, token, start);
1834 /* Used by C front ends, which really should move to using
1835 cpp_token_as_text. */
1837 cpp_type2name (type)
1838 enum cpp_ttype type;
1840 return (const char *) token_spellings[type].name;
1843 /* Writes the spelling of token to FP, without any preceding space.
1844 Separated from cpp_spell_token for efficiency - to avoid stdio
1845 double-buffering. */
1847 cpp_output_token (token, fp)
1848 const cpp_token *token;
1851 switch (TOKEN_SPELL (token))
1853 case SPELL_OPERATOR:
1855 const unsigned char *spelling;
1858 if (token->flags & DIGRAPH)
1860 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1861 else if (token->flags & NAMED_OP)
1864 spelling = TOKEN_NAME (token);
1869 while ((c = *++spelling) != '\0');
1874 putc (token->val.c, fp);
1879 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1883 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1888 int left, right, tag;
1889 switch (token->type)
1891 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1892 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1893 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1894 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1895 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1896 case CPP_ASM: left = '\0'; right = '\0'; tag = '\0'; break;
1898 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1901 if (tag) putc (tag, fp);
1902 if (left) putc (left, fp);
1903 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1904 if (right) putc (right, fp);
1909 /* An error, most probably. */
1914 /* Compare two tokens. */
1916 _cpp_equiv_tokens (a, b)
1917 const cpp_token *a, *b;
1919 if (a->type == b->type && a->flags == b->flags)
1920 switch (TOKEN_SPELL (a))
1922 default: /* Keep compiler happy. */
1923 case SPELL_OPERATOR:
1926 return a->val.c == b->val.c; /* Character. */
1928 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1930 return a->val.node == b->val.node;
1933 return (a->val.str.len == b->val.str.len
1934 && !memcmp (a->val.str.text, b->val.str.text,
1941 /* Returns nonzero if a space should be inserted to avoid an
1942 accidental token paste for output. For simplicity, it is
1943 conservative, and occasionally advises a space where one is not
1944 needed, e.g. "." and ".2". */
1946 cpp_avoid_paste (pfile, token1, token2)
1948 const cpp_token *token1, *token2;
1950 enum cpp_ttype a = token1->type, b = token2->type;
1953 if (token1->flags & NAMED_OP)
1955 if (token2->flags & NAMED_OP)
1959 if (token2->flags & DIGRAPH)
1960 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1961 else if (token_spellings[b].category == SPELL_OPERATOR)
1962 c = token_spellings[b].name[0];
1964 /* Quickly get everything that can paste with an '='. */
1965 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1970 case CPP_GREATER: return c == '>' || c == '?';
1971 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1972 case CPP_PLUS: return c == '+';
1973 case CPP_MINUS: return c == '-' || c == '>';
1974 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1975 case CPP_MOD: return c == ':' || c == '>';
1976 case CPP_AND: return c == '&';
1977 case CPP_OR: return c == '|';
1978 case CPP_COLON: return c == ':' || c == '>';
1979 case CPP_DEREF: return c == '*';
1980 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1981 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1982 case CPP_NAME: return ((b == CPP_NUMBER
1983 && name_p (pfile, &token2->val.str))
1985 || b == CPP_CHAR || b == CPP_STRING); /* L */
1986 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1987 || c == '.' || c == '+' || c == '-');
1988 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1989 && token1->val.c == '@'
1990 && (b == CPP_NAME || b == CPP_STRING));
1997 /* Output all the remaining tokens on the current line, and a newline
1998 character, to FP. Leading whitespace is removed. If there are
1999 macros, special token padding is not performed. */
2001 cpp_output_line (pfile, fp)
2005 const cpp_token *token;
2007 token = cpp_get_token (pfile);
2008 while (token->type != CPP_EOF)
2010 cpp_output_token (token, fp);
2011 token = cpp_get_token (pfile);
2012 if (token->flags & PREV_WHITE)
2019 /* Returns the value of a hexadecimal digit. */
2025 return hex_value (c);
2030 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
2031 failure if cpplib is not parsing C++ or C99. Such failure is
2032 silent, and no variables are updated. Otherwise returns 0, and
2033 warns if -Wtraditional.
2035 [lex.charset]: The character designated by the universal character
2036 name \UNNNNNNNN is that character whose character short name in
2037 ISO/IEC 10646 is NNNNNNNN; the character designated by the
2038 universal character name \uNNNN is that character whose character
2039 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
2040 for a universal character name is less than 0x20 or in the range
2041 0x7F-0x9F (inclusive), or if the universal character name
2042 designates a character in the basic source character set, then the
2043 program is ill-formed.
2045 We assume that wchar_t is Unicode, so we don't need to do any
2046 mapping. Is this ever wrong?
2048 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
2049 LIMIT is the end of the string or charconst. PSTR is updated to
2050 point after the UCS on return, and the UCS is written into PC. */
2053 maybe_read_ucs (pfile, pstr, limit, pc)
2055 const unsigned char **pstr;
2056 const unsigned char *limit;
2059 const unsigned char *p = *pstr;
2060 unsigned int code = 0;
2061 unsigned int c = *pc, length;
2063 /* Only attempt to interpret a UCS for C++ and C99. */
2064 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
2067 if (CPP_WTRADITIONAL (pfile))
2068 cpp_error (pfile, DL_WARNING,
2069 "the meaning of '\\%c' is different in traditional C", c);
2071 length = (c == 'u' ? 4: 8);
2073 if ((size_t) (limit - p) < length)
2075 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
2076 /* Skip to the end to avoid more diagnostics. */
2081 for (; length; length--, p++)
2085 code = (code << 4) + hex_digit_value (c);
2088 cpp_error (pfile, DL_ERROR,
2089 "non-hex digit '%c' in universal-character-name", c);
2090 /* We shouldn't skip in case there are multibyte chars. */
2096 #ifdef TARGET_EBCDIC
2097 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
2098 code = 0x3f; /* EBCDIC invalid character */
2100 /* True extended characters are OK. */
2102 && !(code & 0x80000000)
2103 && !(code >= 0xD800 && code <= 0xDFFF))
2105 /* The standard permits $, @ and ` to be specified as UCNs. We use
2106 hex escapes so that this also works with EBCDIC hosts. */
2107 else if (code == 0x24 || code == 0x40 || code == 0x60)
2109 /* Don't give another error if one occurred above. */
2110 else if (length == 0)
2111 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
2119 /* Returns the value of an escape sequence, truncated to the correct
2120 target precision. PSTR points to the input pointer, which is just
2121 after the backslash. LIMIT is how much text we have. WIDE is true
2122 if the escape sequence is part of a wide character constant or
2123 string literal. Handles all relevant diagnostics. */
2125 cpp_parse_escape (pfile, pstr, limit, wide)
2127 const unsigned char **pstr;
2128 const unsigned char *limit;
2132 const unsigned char *str = *pstr;
2137 width = CPP_OPTION (pfile, wchar_precision);
2139 width = CPP_OPTION (pfile, char_precision);
2140 if (width < BITS_PER_CPPCHAR_T)
2141 mask = ((cppchar_t) 1 << width) - 1;
2148 case '\\': case '\'': case '"': case '?': break;
2149 case 'b': c = TARGET_BS; break;
2150 case 'f': c = TARGET_FF; break;
2151 case 'n': c = TARGET_NEWLINE; break;
2152 case 'r': c = TARGET_CR; break;
2153 case 't': c = TARGET_TAB; break;
2154 case 'v': c = TARGET_VT; break;
2156 case '(': case '{': case '[': case '%':
2157 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
2158 '\%' is used to prevent SCCS from getting confused. */
2159 unknown = CPP_PEDANTIC (pfile);
2163 if (CPP_WTRADITIONAL (pfile))
2164 cpp_error (pfile, DL_WARNING,
2165 "the meaning of '\\a' is different in traditional C");
2170 if (CPP_PEDANTIC (pfile))
2171 cpp_error (pfile, DL_PEDWARN,
2172 "non-ISO-standard escape sequence, '\\%c'", (int) c);
2177 unknown = maybe_read_ucs (pfile, &str, limit, &c);
2181 if (CPP_WTRADITIONAL (pfile))
2182 cpp_error (pfile, DL_WARNING,
2183 "the meaning of '\\x' is different in traditional C");
2186 cppchar_t i = 0, overflow = 0;
2187 int digits_found = 0;
2195 overflow |= i ^ (i << 4 >> 4);
2196 i = (i << 4) + hex_digit_value (c);
2201 cpp_error (pfile, DL_ERROR,
2202 "\\x used with no following hex digits");
2204 if (overflow | (i != (i & mask)))
2206 cpp_error (pfile, DL_PEDWARN,
2207 "hex escape sequence out of range");
2214 case '0': case '1': case '2': case '3':
2215 case '4': case '5': case '6': case '7':
2218 cppchar_t i = c - '0';
2220 while (str < limit && ++count < 3)
2223 if (c < '0' || c > '7')
2226 i = (i << 3) + c - '0';
2229 if (i != (i & mask))
2231 cpp_error (pfile, DL_PEDWARN,
2232 "octal escape sequence out of range");
2247 cpp_error (pfile, DL_PEDWARN,
2248 "unknown escape sequence '\\%c'", (int) c);
2250 cpp_error (pfile, DL_PEDWARN,
2251 "unknown escape sequence: '\\%03o'", (int) c);
2256 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
2264 /* Interpret a (possibly wide) character constant in TOKEN.
2265 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
2266 points to a variable that is filled in with the number of
2267 characters seen, and UNSIGNEDP to a variable that indicates whether
2268 the result has signed type. */
2270 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
2272 const cpp_token *token;
2273 unsigned int *pchars_seen;
2276 const unsigned char *str = token->val.str.text;
2277 const unsigned char *limit = str + token->val.str.len;
2278 unsigned int chars_seen = 0;
2279 size_t width, max_chars;
2280 cppchar_t c, mask, result = 0;
2283 #ifdef MULTIBYTE_CHARS
2284 (void) local_mbtowc (NULL, NULL, 0);
2287 /* Width in bits. */
2288 if (token->type == CPP_CHAR)
2290 width = CPP_OPTION (pfile, char_precision);
2291 max_chars = CPP_OPTION (pfile, int_precision) / width;
2292 unsigned_p = CPP_OPTION (pfile, unsigned_char);
2296 width = CPP_OPTION (pfile, wchar_precision);
2298 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
2301 if (width < BITS_PER_CPPCHAR_T)
2302 mask = ((cppchar_t) 1 << width) - 1;
2308 #ifdef MULTIBYTE_CHARS
2312 char_len = local_mbtowc (&wc, str, limit - str);
2315 cpp_error (pfile, DL_WARNING,
2316 "ignoring invalid multibyte character");
2329 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
2331 #ifdef MAP_CHARACTER
2333 c = MAP_CHARACTER (c);
2338 /* Truncate the character, scale the result and merge the two. */
2340 if (width < BITS_PER_CPPCHAR_T)
2341 result = (result << width) | c;
2346 if (chars_seen == 0)
2347 cpp_error (pfile, DL_ERROR, "empty character constant");
2348 else if (chars_seen > 1)
2350 /* Multichar charconsts are of type int and therefore signed. */
2353 if (chars_seen > max_chars)
2355 chars_seen = max_chars;
2356 cpp_error (pfile, DL_WARNING,
2357 "character constant too long for its type");
2359 else if (CPP_OPTION (pfile, warn_multichar))
2360 cpp_error (pfile, DL_WARNING, "multi-character character constant");
2363 /* Sign-extend or truncate the constant to cppchar_t. The value is
2364 in WIDTH bits, but for multi-char charconsts it's value is the
2365 full target type's width. */
2368 if (width < BITS_PER_CPPCHAR_T)
2370 mask = ((cppchar_t) 1 << width) - 1;
2371 if (unsigned_p || !(result & (1 << (width - 1))))
2377 *pchars_seen = chars_seen;
2378 *unsignedp = unsigned_p;
2382 /* Memory buffers. Changing these three constants can have a dramatic
2383 effect on performance. The values here are reasonable defaults,
2384 but might be tuned. If you adjust them, be sure to test across a
2385 range of uses of cpplib, including heavy nested function-like macro
2386 expansion. Also check the change in peak memory usage (NJAMD is a
2387 good tool for this). */
2388 #define MIN_BUFF_SIZE 8000
2389 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2390 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2391 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2393 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2394 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2397 /* Create a new allocation buffer. Place the control block at the end
2398 of the buffer, so that buffer overflows will cause immediate chaos. */
2404 unsigned char *base;
2406 if (len < MIN_BUFF_SIZE)
2407 len = MIN_BUFF_SIZE;
2408 len = CPP_ALIGN (len);
2410 base = xmalloc (len + sizeof (_cpp_buff));
2411 result = (_cpp_buff *) (base + len);
2412 result->base = base;
2414 result->limit = base + len;
2415 result->next = NULL;
2419 /* Place a chain of unwanted allocation buffers on the free list. */
2421 _cpp_release_buff (pfile, buff)
2425 _cpp_buff *end = buff;
2429 end->next = pfile->free_buffs;
2430 pfile->free_buffs = buff;
2433 /* Return a free buffer of size at least MIN_SIZE. */
2435 _cpp_get_buff (pfile, min_size)
2439 _cpp_buff *result, **p;
2441 for (p = &pfile->free_buffs;; p = &(*p)->next)
2446 return new_buff (min_size);
2448 size = result->limit - result->base;
2449 /* Return a buffer that's big enough, but don't waste one that's
2451 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2456 result->next = NULL;
2457 result->cur = result->base;
2461 /* Creates a new buffer with enough space to hold the uncommitted
2462 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2463 the excess bytes to the new buffer. Chains the new buffer after
2464 BUFF, and returns the new buffer. */
2466 _cpp_append_extend_buff (pfile, buff, min_extra)
2471 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2472 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2474 buff->next = new_buff;
2475 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2479 /* Creates a new buffer with enough space to hold the uncommitted
2480 remaining bytes of the buffer pointed to by BUFF, and at least
2481 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2482 Chains the new buffer before the buffer pointed to by BUFF, and
2483 updates the pointer to point to the new buffer. */
2485 _cpp_extend_buff (pfile, pbuff, min_extra)
2490 _cpp_buff *new_buff, *old_buff = *pbuff;
2491 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2493 new_buff = _cpp_get_buff (pfile, size);
2494 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2495 new_buff->next = old_buff;
2499 /* Free a chain of buffers starting at BUFF. */
2501 _cpp_free_buff (buff)
2506 for (; buff; buff = next)
2513 /* Allocate permanent, unaligned storage of length LEN. */
2515 _cpp_unaligned_alloc (pfile, len)
2519 _cpp_buff *buff = pfile->u_buff;
2520 unsigned char *result = buff->cur;
2522 if (len > (size_t) (buff->limit - result))
2524 buff = _cpp_get_buff (pfile, len);
2525 buff->next = pfile->u_buff;
2526 pfile->u_buff = buff;
2530 buff->cur = result + len;
2534 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2535 That buffer is used for growing allocations when saving macro
2536 replacement lists in a #define, and when parsing an answer to an
2537 assertion in #assert, #unassert or #if (and therefore possibly
2538 whilst expanding macros). It therefore must not be used by any
2539 code that they might call: specifically the lexer and the guts of
2542 All existing other uses clearly fit this restriction: storing
2543 registered pragmas during initialization. */
2545 _cpp_aligned_alloc (pfile, len)
2549 _cpp_buff *buff = pfile->a_buff;
2550 unsigned char *result = buff->cur;
2552 if (len > (size_t) (buff->limit - result))
2554 buff = _cpp_get_buff (pfile, len);
2555 buff->next = pfile->a_buff;
2556 pfile->a_buff = buff;
2560 buff->cur = result + len;