1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
39 enum spell_type category;
40 const unsigned char *name;
43 static const unsigned char *const digraph_spellings[] =
44 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
46 #define OP(e, s) { SPELL_OPERATOR, UC s },
47 #define TK(e, s) { SPELL_ ## s, UC #e },
48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
55 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
56 static int skip_line_comment (cpp_reader *);
57 static void skip_whitespace (cpp_reader *, cppchar_t);
58 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
59 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
60 static void store_comment (cpp_reader *, cpp_token *);
61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64 static int name_p (cpp_reader *, const cpp_string *);
65 static tokenrun *next_tokenrun (tokenrun *);
67 static _cpp_buff *new_buff (size_t);
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
75 cpp_ideq (const cpp_token *token, const char *string)
77 if (token->type != CPP_NAME)
80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
83 /* Record a note TYPE at byte POS into the current cleaned logical
86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
88 if (buffer->notes_used == buffer->notes_cap)
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
91 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
103 _cpp_clean_line (cpp_reader *pfile)
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
115 if (!buffer->from_stage3)
117 const uchar *pbackslash = NULL;
119 /* Short circuit for the common case of an un-escaped line with
120 no trigraphs. The primary win here is by not writing any
121 data back to memory until we have to. */
125 if (__builtin_expect (c == '\n', false)
126 || __builtin_expect (c == '\r', false))
130 if (__builtin_expect (s == buffer->rlimit, false))
133 /* DOS line ending? */
134 if (__builtin_expect (c == '\r', false)
138 if (s == buffer->rlimit)
142 if (__builtin_expect (pbackslash == NULL, true))
145 /* Check for escaped newline. */
147 while (is_nvspace (p[-1]))
149 if (p - 1 != pbackslash)
152 /* Have an escaped newline; process it and proceed to
154 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
156 buffer->next_line = p - 1;
159 if (__builtin_expect (c == '\\', false))
161 else if (__builtin_expect (c == '?', false)
162 && __builtin_expect (s[1] == '?', false)
163 && _cpp_trigraph_map[s[2]])
165 /* Have a trigraph. We may or may not have to convert
166 it. Add a line note regardless, for -Wtrigraphs. */
167 add_line_note (buffer, s, s[2]);
168 if (CPP_OPTION (pfile, trigraphs))
170 /* We do, and that means we have to switch to the
173 *d = _cpp_trigraph_map[s[2]];
186 if (c == '\n' || c == '\r')
188 /* Handle DOS line endings. */
189 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
191 if (s == buffer->rlimit)
196 while (p != buffer->next_line && is_nvspace (p[-1]))
198 if (p == buffer->next_line || p[-1] != '\\')
201 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
203 buffer->next_line = p - 1;
205 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
207 /* Add a note regardless, for the benefit of -Wtrigraphs. */
208 add_line_note (buffer, d, s[2]);
209 if (CPP_OPTION (pfile, trigraphs))
211 *d = _cpp_trigraph_map[s[2]];
221 while (*s != '\n' && *s != '\r');
224 /* Handle DOS line endings. */
225 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
231 /* A sentinel note that should never be processed. */
232 add_line_note (buffer, d + 1, '\n');
233 buffer->next_line = s + 1;
236 /* Return true if the trigraph indicated by NOTE should be warned
237 about in a comment. */
239 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
243 /* Within comments we don't warn about trigraphs, unless the
244 trigraph forms an escaped newline, as that may change
246 if (note->type != '/')
249 /* If -trigraphs, then this was an escaped newline iff the next note
251 if (CPP_OPTION (pfile, trigraphs))
252 return note[1].pos == note->pos;
254 /* Otherwise, see if this forms an escaped newline. */
256 while (is_nvspace (*p))
259 /* There might have been escaped newlines between the trigraph and the
260 newline we found. Hence the position test. */
261 return (*p == '\n' && p < note[1].pos);
264 /* Process the notes created by add_line_note as far as the current
267 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
269 cpp_buffer *buffer = pfile->buffer;
273 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
276 if (note->pos > buffer->cur)
280 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
282 if (note->type == '\\' || note->type == ' ')
284 if (note->type == ' ' && !in_comment)
285 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
286 "backslash and newline separated by space");
288 if (buffer->next_line > buffer->rlimit)
290 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
291 "backslash-newline at end of file");
292 /* Prevent "no newline at end of file" warning. */
293 buffer->next_line = buffer->rlimit;
296 buffer->line_base = note->pos;
297 CPP_INCREMENT_LINE (pfile, 0);
299 else if (_cpp_trigraph_map[note->type])
301 if (CPP_OPTION (pfile, warn_trigraphs)
302 && (!in_comment || warn_in_comment (pfile, note)))
304 if (CPP_OPTION (pfile, trigraphs))
305 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
306 "trigraph ??%c converted to %c",
308 (int) _cpp_trigraph_map[note->type]);
312 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
313 "trigraph ??%c ignored, use -trigraphs to enable",
323 /* SDCC _asm specific */
324 /* Skip an _asm ... _endasm block. We find the end of the comment by
325 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
328 skip_asm_block (cpp_reader *pfile)
330 #define _ENDASM_STR "endasm"
331 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
333 cpp_buffer *buffer = pfile->buffer;
338 while (buffer->cur != buffer->rlimit)
340 prev_space = is_space(c);
343 if (prev_space && c == '_')
345 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
346 strncmp((char *)buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
348 buffer->cur += _ENDASM_LEN;
357 _cpp_process_line_notes (pfile, true);
358 if (buffer->next_line >= buffer->rlimit)
360 _cpp_clean_line (pfile);
362 cols = buffer->next_line - buffer->line_base;
363 CPP_INCREMENT_LINE (pfile, cols);
367 _cpp_process_line_notes (pfile, true);
371 /* Skip a C-style block comment. We find the end of the comment by
372 seeing if an asterisk is before every '/' we encounter. Returns
373 nonzero if comment terminated by EOF, zero otherwise.
375 Buffer->cur points to the initial asterisk of the comment. */
377 _cpp_skip_block_comment (cpp_reader *pfile)
379 cpp_buffer *buffer = pfile->buffer;
380 const uchar *cur = buffer->cur;
389 /* People like decorating comments with '*', so check for '/'
390 instead for efficiency. */
398 /* Warn about potential nested comments, but not if the '/'
399 comes immediately before the true comment delimiter.
400 Don't bother to get it right across escaped newlines. */
401 if (CPP_OPTION (pfile, warn_comments)
402 && cur[0] == '*' && cur[1] != '/')
405 cpp_error_with_line (pfile, CPP_DL_WARNING,
406 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
407 "\"/*\" within comment");
413 buffer->cur = cur - 1;
414 _cpp_process_line_notes (pfile, true);
415 if (buffer->next_line >= buffer->rlimit)
417 _cpp_clean_line (pfile);
419 cols = buffer->next_line - buffer->line_base;
420 CPP_INCREMENT_LINE (pfile, cols);
427 _cpp_process_line_notes (pfile, true);
431 /* Skip a C++ line comment, leaving buffer->cur pointing to the
432 terminating newline. Handles escaped newlines. Returns nonzero
433 if a multiline comment. */
435 skip_line_comment (cpp_reader *pfile)
437 cpp_buffer *buffer = pfile->buffer;
438 source_location orig_line = pfile->line_table->highest_line;
440 while (*buffer->cur != '\n')
443 _cpp_process_line_notes (pfile, true);
444 return orig_line != pfile->line_table->highest_line;
447 /* Skips whitespace, saving the next non-whitespace character. */
449 skip_whitespace (cpp_reader *pfile, cppchar_t c)
451 cpp_buffer *buffer = pfile->buffer;
452 bool saw_NUL = false;
456 /* Horizontal space always OK. */
457 if (c == ' ' || c == '\t')
459 /* Just \f \v or \0 left. */
462 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
463 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
464 CPP_BUF_COL (buffer),
465 "%s in preprocessing directive",
466 c == '\f' ? "form feed" : "vertical tab");
470 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
471 while (is_nvspace (c));
474 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
479 /* See if the characters of a number token are valid in a name (no
482 name_p (cpp_reader *pfile, const cpp_string *string)
486 for (i = 0; i < string->len; i++)
487 if (!is_idchar (string->text[i]))
493 /* After parsing an identifier or other sequence, produce a warning about
494 sequences not in NFC/NFKC. */
496 warn_about_normalization (cpp_reader *pfile,
497 const cpp_token *token,
498 const struct normalize_state *s)
500 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
501 && !pfile->state.skipping)
503 /* Make sure that the token is printed using UCNs, even
504 if we'd otherwise happily print UTF-8. */
505 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
508 sz = cpp_spell_token (pfile, token, buf, false) - buf;
509 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
510 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
511 "`%.*s' is not in NFKC", (int) sz, buf);
513 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
514 "`%.*s' is not in NFC", (int) sz, buf);
518 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
519 an identifier. FIRST is TRUE if this starts an identifier. */
521 forms_identifier_p (cpp_reader *pfile, int first,
522 struct normalize_state *state)
524 cpp_buffer *buffer = pfile->buffer;
526 if (*buffer->cur == '$')
528 if (!CPP_OPTION (pfile, dollars_in_ident))
532 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
534 CPP_OPTION (pfile, warn_dollars) = 0;
535 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
541 /* Is this a syntactically valid UCN? */
542 if (CPP_OPTION (pfile, extended_identifiers)
543 && *buffer->cur == '\\'
544 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
547 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
556 /* Lex an identifier starting at BUFFER->CUR - 1. */
557 static cpp_hashnode *
558 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
559 struct normalize_state *nst)
561 cpp_hashnode *result;
564 unsigned int hash = HT_HASHSTEP (0, *base);
566 cur = pfile->buffer->cur;
568 while (ISIDNUM (*cur))
570 hash = HT_HASHSTEP (hash, *cur);
573 pfile->buffer->cur = cur;
574 if (starts_ucn || forms_identifier_p (pfile, false, nst))
576 /* Slower version for identifiers containing UCNs (or $). */
578 while (ISIDNUM (*pfile->buffer->cur))
580 pfile->buffer->cur++;
581 NORMALIZE_STATE_UPDATE_IDNUM (nst);
583 } while (forms_identifier_p (pfile, false, nst));
584 result = _cpp_interpret_identifier (pfile, base,
585 pfile->buffer->cur - base);
590 hash = HT_HASHFINISH (hash, len);
592 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
593 base, len, hash, HT_ALLOC));
596 /* Rarely, identifiers require diagnostics when lexed. */
597 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
598 && !pfile->state.skipping, 0))
600 /* It is allowed to poison the same identifier twice. */
601 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
602 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
605 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
606 replacement list of a variadic macro. */
607 if (result == pfile->spec_nodes.n__VA_ARGS__
608 && !pfile->state.va_args_ok)
609 cpp_error (pfile, CPP_DL_PEDWARN,
610 "__VA_ARGS__ can only appear in the expansion"
611 " of a C99 variadic macro");
618 /* Pedantic parse a number, beginning with character C, skipping embedded
619 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
620 before C. Place the result in NUMBER. */
622 pedantic_lex_number (cpp_reader *pfile, cpp_string *number)
624 #define get_effective_char(pfile) (*pfile->buffer->cur++)
625 #define BACKUP() (--pfile->buffer->cur)
627 enum num_type_e { NT_DEC, NT_HEX, NT_BIN } num_type = NT_DEC;
628 enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
630 uchar c = *(pfile->buffer->cur - 1);
631 struct obstack *stack = &pfile->hash_table->stack;
640 obstack_1grow (stack, '.');
641 c = get_effective_char (pfile);
649 obstack_1grow (stack, c);
650 c = get_effective_char (pfile);
658 obstack_1grow (stack, c);
659 c = get_effective_char (pfile);
664 if (!CPP_OPTION (pfile, std))
668 obstack_1grow (stack, c);
669 c = get_effective_char (pfile);
676 obstack_1grow (stack, c);
677 c = get_effective_char (pfile);
688 if (NT_DEC == num_type)
694 obstack_1grow (stack, c);
695 c = get_effective_char (pfile);
702 obstack_1grow (stack, c);
703 c = get_effective_char (pfile);
706 else if ('E' == c || 'e' == c)
708 if (has_whole || has_fract)
712 obstack_1grow (stack, c);
713 c = get_effective_char (pfile);
720 else if (NT_HEX == num_type)
726 obstack_1grow (stack, c);
727 c = get_effective_char (pfile);
734 obstack_1grow (stack, c);
735 c = get_effective_char (pfile);
738 else if ('P' == c || 'p' == c)
740 if (has_whole || has_fract)
744 obstack_1grow (stack, c);
745 c = get_effective_char (pfile);
752 else /* (NT_BIN == num_type) */
754 while ((c=='0') || (c=='1'))
758 obstack_1grow (stack, c);
759 c = get_effective_char (pfile);
766 obstack_1grow (stack, c);
767 c = get_effective_char (pfile);
770 else if ('P' == c || 'p' == c)
772 if (has_whole || has_fract)
776 obstack_1grow (stack, c);
777 c = get_effective_char (pfile);
784 num_part = NP_INT_SUFFIX;
788 if (NT_DEC == num_type)
794 obstack_1grow (stack, c);
795 c = get_effective_char (pfile);
798 if ('E' == c || 'e' == c)
800 if (has_whole || has_fract)
804 obstack_1grow (stack, c);
805 c = get_effective_char (pfile);
816 obstack_1grow (stack, c);
817 c = get_effective_char (pfile);
820 if ('P' == c || 'p' == c)
822 if (has_whole || has_fract)
826 obstack_1grow (stack, c);
827 c = get_effective_char (pfile);
832 num_part = NP_FLOAT_SUFFIX;
836 if ('+' == c || '-' == c)
839 obstack_1grow (stack, c);
840 c = get_effective_char (pfile);
846 obstack_1grow (stack, c);
847 c = get_effective_char (pfile);
850 num_part = NP_FLOAT_SUFFIX;
854 if ('L' == c || 'l' == c)
859 obstack_1grow (stack, c);
860 c = get_effective_char (pfile);
865 obstack_1grow (stack, c);
866 c = get_effective_char (pfile);
869 else if ('U' == c || 'u' == c)
872 obstack_1grow (stack, c);
873 c = get_effective_char (pfile);
877 case NP_FLOAT_SUFFIX:
878 if ('F' == c || 'f' == c)
881 obstack_1grow (stack, c);
882 c = get_effective_char (pfile);
884 else if ('L' == c || 'l' == c)
887 obstack_1grow (stack, c);
888 c = get_effective_char (pfile);
895 /* Step back over the unwanted char. */
898 number->text = obstack_finish (stack);
902 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
904 lex_number (cpp_reader *pfile, cpp_string *number,
905 struct normalize_state *nst)
911 base = pfile->buffer->cur - 1;
914 cur = pfile->buffer->cur;
916 /* N.B. ISIDNUM does not include $. */
917 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
920 NORMALIZE_STATE_UPDATE_IDNUM (nst);
923 pfile->buffer->cur = cur;
925 while (forms_identifier_p (pfile, false, nst));
927 number->len = cur - base;
928 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
929 memcpy (dest, base, number->len);
930 dest[number->len] = '\0';
934 /* Create a token of type TYPE with a literal spelling. */
936 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
937 unsigned int len, enum cpp_ttype type)
939 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
941 memcpy (dest, base, len);
944 token->val.str.len = len;
945 token->val.str.text = dest;
948 /* Lexes a string, character constant, or angle-bracketed header file
949 name. The stored string contains the spelling, including opening
950 quote and leading any leading 'L', 'u' or 'U'. It returns the type
951 of the literal, or CPP_OTHER if it was not properly terminated, or
952 CPP_LESS for an unterminated header name which must be relexed as
955 The spelling is NUL-terminated, but it is not guaranteed that this
956 is the first NUL since embedded NULs are preserved. */
958 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
960 bool saw_NUL = false;
962 cppchar_t terminator;
967 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
969 if (terminator == '\"')
970 type = (*base == 'L' ? CPP_WSTRING :
971 *base == 'U' ? CPP_STRING32 :
972 *base == 'u' ? CPP_STRING16 : CPP_STRING);
973 else if (terminator == '\'')
974 type = (*base == 'L' ? CPP_WCHAR :
975 *base == 'U' ? CPP_CHAR32 :
976 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
978 terminator = '>', type = CPP_HEADER_NAME;
982 cppchar_t c = *cur++;
984 /* In #include-style directives, terminators are not escapable. */
985 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
987 else if (c == terminator)
992 /* Unmatched quotes always yield undefined behavior, but
993 greedy lexing means that what appears to be an unterminated
994 header name may actually be a legitimate sequence of tokens. */
995 if (terminator == '>')
997 token->type = CPP_LESS;
1007 if (saw_NUL && !pfile->state.skipping)
1008 cpp_error (pfile, CPP_DL_WARNING,
1009 "null character(s) preserved in literal");
1011 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1012 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1015 pfile->buffer->cur = cur;
1016 create_literal (pfile, token, base, cur - base, type);
1019 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
1020 comment blocks (when executed with -C option) and
1021 _asm (SDCPP specific) blocks */
1023 /* Count and copy characters from src to dest, excluding CRs:
1024 CRs are automatically generated, because the output is
1025 opened in TEXT mode. If dest == NULL, only count chars */
1027 copy_text_chars (unsigned char *dest, const unsigned char *src, unsigned int len)
1030 const unsigned char *p;
1032 for (p = src; p != src + len; ++p)
1047 /* SDCC _asm specific */
1048 /* The stored comment includes the comment start and any terminator. */
1050 save_asm (cpp_reader *pfile, cpp_token *token, const unsigned char *from)
1052 #define _ASM_STR "_asm"
1053 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
1055 unsigned char *buffer;
1056 unsigned int text_len, len;
1058 len = pfile->buffer->cur - from;
1059 /* + _ASM_LEN for the initial '_asm'. */
1060 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
1061 buffer = _cpp_unaligned_alloc (pfile, text_len);
1064 token->type = CPP_ASM;
1065 token->val.str.len = text_len;
1066 token->val.str.text = buffer;
1068 memcpy (buffer, _ASM_STR, _ASM_LEN);
1069 copy_text_chars (buffer + _ASM_LEN, from, len);
1072 /* Return the comment table. The client may not make any assumption
1073 about the ordering of the table. */
1075 cpp_get_comments (cpp_reader *pfile)
1077 return &pfile->comments;
1080 /* Append a comment to the end of the comment table. */
1082 store_comment (cpp_reader *pfile, cpp_token *token)
1086 if (pfile->comments.allocated == 0)
1088 pfile->comments.allocated = 256;
1089 pfile->comments.entries = (cpp_comment *) xmalloc
1090 (pfile->comments.allocated * sizeof (cpp_comment));
1093 if (pfile->comments.count == pfile->comments.allocated)
1095 pfile->comments.allocated *= 2;
1096 pfile->comments.entries = (cpp_comment *) xrealloc
1097 (pfile->comments.entries,
1098 pfile->comments.allocated * sizeof (cpp_comment));
1101 len = token->val.str.len;
1103 /* Copy comment. Note, token may not be NULL terminated. */
1104 pfile->comments.entries[pfile->comments.count].comment =
1105 (char *) xmalloc (sizeof (char) * (len + 1));
1106 memcpy (pfile->comments.entries[pfile->comments.count].comment,
1107 token->val.str.text, len);
1108 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1110 /* Set source location. */
1111 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1113 /* Increment the count of entries in the comment table. */
1114 pfile->comments.count++;
1117 /* The stored comment includes the comment start and any terminator. */
1119 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1122 unsigned char *buffer;
1123 unsigned int len, clen;
1125 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1127 /* C++ comments probably (not definitely) have moved past a new
1128 line, which we don't want to save in the comment. */
1129 if (is_vspace (pfile->buffer->cur[-1]))
1132 /* If we are currently in a directive, then we need to store all
1133 C++ comments as C comments internally, and so we need to
1134 allocate a little extra space in that case.
1136 Note that the only time we encounter a directive here is
1137 when we are saving comments in a "#define". */
1138 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1140 buffer = _cpp_unaligned_alloc (pfile, clen);
1142 token->type = CPP_COMMENT;
1143 token->val.str.len = clen;
1144 token->val.str.text = buffer;
1147 copy_text_chars (buffer + 1, from, len);
1149 /* Finish conversion to a C comment, if necessary. */
1150 if (pfile->state.in_directive && type == '/')
1153 buffer[clen - 2] = '*';
1154 buffer[clen - 1] = '/';
1157 /* Finally store this comment for use by clients of libcpp. */
1158 store_comment (pfile, token);
1161 /* Allocate COUNT tokens for RUN. */
1163 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1165 run->base = XNEWVEC (cpp_token, count);
1166 run->limit = run->base + count;
1170 /* Returns the next tokenrun, or creates one if there is none. */
1172 next_tokenrun (tokenrun *run)
1174 if (run->next == NULL)
1176 run->next = XNEW (tokenrun);
1177 run->next->prev = run;
1178 _cpp_init_tokenrun (run->next, 250);
1184 /* Look ahead in the input stream. */
1186 cpp_peek_token (cpp_reader *pfile, int index)
1188 cpp_context *context = pfile->context;
1189 const cpp_token *peektok;
1192 /* First, scan through any pending cpp_context objects. */
1193 while (context->prev)
1195 ptrdiff_t sz = (context->direct_p
1196 ? LAST (context).token - FIRST (context).token
1197 : LAST (context).ptoken - FIRST (context).ptoken);
1199 if (index < (int) sz)
1200 return (context->direct_p
1201 ? FIRST (context).token + index
1202 : *(FIRST (context).ptoken + index));
1205 context = context->prev;
1208 /* We will have to read some new tokens after all (and do so
1209 without invalidating preceding tokens). */
1211 pfile->keep_tokens++;
1215 peektok = _cpp_lex_token (pfile);
1216 if (peektok->type == CPP_EOF)
1221 _cpp_backup_tokens_direct (pfile, count + 1);
1222 pfile->keep_tokens--;
1227 /* Allocate a single token that is invalidated at the same time as the
1228 rest of the tokens on the line. Has its line and col set to the
1229 same as the last lexed token, so that diagnostics appear in the
1232 _cpp_temp_token (cpp_reader *pfile)
1234 cpp_token *old, *result;
1235 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1236 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1238 old = pfile->cur_token - 1;
1239 /* Any pre-existing lookaheads must not be clobbered. */
1244 tokenrun *next = next_tokenrun (pfile->cur_run);
1247 memmove (next->base + 1, next->base,
1248 (la - sz) * sizeof (cpp_token));
1250 next->base[0] = pfile->cur_run->limit[-1];
1254 memmove (pfile->cur_token + 1, pfile->cur_token,
1255 MIN (la, sz - 1) * sizeof (cpp_token));
1258 if (!sz && pfile->cur_token == pfile->cur_run->limit)
1260 pfile->cur_run = next_tokenrun (pfile->cur_run);
1261 pfile->cur_token = pfile->cur_run->base;
1264 result = pfile->cur_token++;
1265 result->src_loc = old->src_loc;
1269 /* Lex a token into RESULT (external interface). Takes care of issues
1270 like directive handling, token lookahead, multiple include
1271 optimization and skipping. */
1273 _cpp_lex_token (cpp_reader *pfile)
1279 if (pfile->cur_token == pfile->cur_run->limit)
1281 pfile->cur_run = next_tokenrun (pfile->cur_run);
1282 pfile->cur_token = pfile->cur_run->base;
1284 /* We assume that the current token is somewhere in the current
1286 if (pfile->cur_token < pfile->cur_run->base
1287 || pfile->cur_token >= pfile->cur_run->limit)
1290 if (pfile->lookaheads)
1292 pfile->lookaheads--;
1293 result = pfile->cur_token++;
1296 result = _cpp_lex_direct (pfile);
1298 if (result->flags & BOL)
1300 /* Is this a directive. If _cpp_handle_directive returns
1301 false, it is an assembler #. */
1302 if (result->type == CPP_HASH
1303 /* 6.10.3 p 11: Directives in a list of macro arguments
1304 gives undefined behavior. This implementation
1305 handles the directive as normal. */
1306 && pfile->state.parsing_args != 1)
1308 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1310 if (pfile->directive_result.type == CPP_PADDING)
1312 result = &pfile->directive_result;
1315 else if (pfile->state.in_deferred_pragma)
1316 result = &pfile->directive_result;
1318 if (pfile->cb.line_change && !pfile->state.skipping)
1319 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1322 /* We don't skip tokens in directives. */
1323 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1326 /* Outside a directive, invalidate controlling macros. At file
1327 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1328 get here and MI optimization works. */
1329 pfile->mi_valid = false;
1331 if (!pfile->state.skipping || result->type == CPP_EOF)
1338 /* Returns true if a fresh line has been loaded. */
1340 _cpp_get_fresh_line (cpp_reader *pfile)
1344 /* We can't get a new line until we leave the current directive. */
1345 if (pfile->state.in_directive)
1350 cpp_buffer *buffer = pfile->buffer;
1352 if (!buffer->need_line)
1355 if (buffer->next_line < buffer->rlimit)
1357 _cpp_clean_line (pfile);
1361 /* First, get out of parsing arguments state. */
1362 if (pfile->state.parsing_args)
1365 /* End of buffer. Non-empty files should end in a newline. */
1366 if (buffer->buf != buffer->rlimit
1367 && buffer->next_line > buffer->rlimit
1368 && !buffer->from_stage3)
1370 /* Clip to buffer size. */
1371 buffer->next_line = buffer->rlimit;
1374 return_at_eof = buffer->return_at_eof;
1375 _cpp_pop_buffer (pfile);
1376 if (pfile->buffer == NULL || return_at_eof)
1381 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1384 result->type = ELSE_TYPE; \
1385 if (*buffer->cur == CHAR) \
1386 buffer->cur++, result->type = THEN_TYPE; \
1390 /* Lex a token into pfile->cur_token, which is also incremented, to
1391 get diagnostics pointing to the correct location.
1393 Does not handle issues such as token lookahead, multiple-include
1394 optimization, directives, skipping etc. This function is only
1395 suitable for use by _cpp_lex_token, and in special cases like
1396 lex_expansion_token which doesn't care for any of these issues.
1398 When meeting a newline, returns CPP_EOF if parsing a directive,
1399 otherwise returns to the start of the token buffer if permissible.
1400 Returns the location of the lexed token. */
1402 _cpp_lex_direct (cpp_reader *pfile)
1406 const unsigned char *comment_start;
1407 cpp_token *result = pfile->cur_token++;
1411 buffer = pfile->buffer;
1412 if (buffer->need_line)
1414 if (pfile->state.in_deferred_pragma)
1416 result->type = CPP_PRAGMA_EOL;
1417 pfile->state.in_deferred_pragma = false;
1418 if (!pfile->state.pragma_allow_expansion)
1419 pfile->state.prevent_expansion--;
1422 if (!_cpp_get_fresh_line (pfile))
1424 result->type = CPP_EOF;
1425 if (!pfile->state.in_directive)
1427 /* Tell the compiler the line number of the EOF token. */
1428 result->src_loc = pfile->line_table->highest_line;
1429 result->flags = BOL;
1433 if (!pfile->keep_tokens)
1435 pfile->cur_run = &pfile->base_run;
1436 result = pfile->base_run.base;
1437 pfile->cur_token = result + 1;
1439 result->flags = BOL;
1440 if (pfile->state.parsing_args == 2)
1441 result->flags |= PREV_WHITE;
1443 buffer = pfile->buffer;
1445 result->src_loc = pfile->line_table->highest_line;
1448 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1449 && !pfile->overlaid_buffer)
1451 _cpp_process_line_notes (pfile, false);
1452 result->src_loc = pfile->line_table->highest_line;
1456 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1457 CPP_BUF_COLUMN (buffer, buffer->cur));
1461 case ' ': case '\t': case '\f': case '\v': case '\0':
1462 result->flags |= PREV_WHITE;
1463 skip_whitespace (pfile, c);
1467 if (buffer->cur < buffer->rlimit)
1468 CPP_INCREMENT_LINE (pfile, 0);
1469 buffer->need_line = true;
1472 case '0': case '1': case '2': case '3': case '4':
1473 case '5': case '6': case '7': case '8': case '9':
1475 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1476 result->type = CPP_NUMBER;
1477 if (CPP_OPTION (pfile, pedantic_parse_number))
1478 pedantic_lex_number (pfile, &result->val.str);
1480 lex_number (pfile, &result->val.str, &nst);
1481 warn_about_normalization (pfile, result, &nst);
1488 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1489 if (c == 'L' || CPP_OPTION (pfile, uliterals))
1491 if (*buffer->cur == '\'' || *buffer->cur == '"')
1493 lex_string (pfile, result, buffer->cur - 1);
1500 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1501 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1502 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1503 case 's': case 't': case 'v': case 'w': case 'x':
1505 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1506 case 'G': case 'H': case 'I': case 'J': case 'K':
1507 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1508 case 'S': case 'T': case 'V': case 'W': case 'X':
1510 result->type = CPP_NAME;
1512 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1513 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1515 warn_about_normalization (pfile, result, &nst);
1518 /* SDCC _asm specific */
1519 /* handle _asm ... _endasm ; */
1520 if (CPP_OPTION (pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1522 comment_start = buffer->cur;
1523 result->type = CPP_ASM;
1524 skip_asm_block (pfile);
1525 /* Save the _asm block as a token in its own right. */
1526 save_asm (pfile, result, comment_start);
1528 /* Convert named operators to their proper types. */
1529 else if (result->val.node->flags & NODE_OPERATOR)
1531 result->flags |= NAMED_OP;
1532 result->type = (enum cpp_ttype) result->val.node->directive_index;
1538 lex_string (pfile, result, buffer->cur - 1);
1542 /* A potential block or line comment. */
1543 comment_start = buffer->cur;
1548 if (_cpp_skip_block_comment (pfile))
1549 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1551 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1552 || cpp_in_system_header (pfile)))
1554 /* Warn about comments only if pedantically GNUC89, and not
1555 in system headers. */
1556 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1557 && ! buffer->warned_cplusplus_comments)
1559 cpp_error (pfile, CPP_DL_PEDWARN,
1560 "C++ style comments are not allowed in ISO C90");
1561 cpp_error (pfile, CPP_DL_PEDWARN,
1562 "(this will be reported only once per input file)");
1563 buffer->warned_cplusplus_comments = 1;
1566 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1567 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1572 result->type = CPP_DIV_EQ;
1577 result->type = CPP_DIV;
1581 if (!pfile->state.save_comments)
1583 result->flags |= PREV_WHITE;
1584 goto update_tokens_line;
1587 /* Save the comment as a token in its own right. */
1588 save_comment (pfile, result, comment_start, c);
1592 if (pfile->state.angled_headers)
1594 lex_string (pfile, result, buffer->cur - 1);
1595 if (result->type != CPP_LESS)
1599 result->type = CPP_LESS;
1600 if (*buffer->cur == '=')
1601 buffer->cur++, result->type = CPP_LESS_EQ;
1602 else if (*buffer->cur == '<')
1605 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1607 else if (CPP_OPTION (pfile, digraphs))
1609 if (*buffer->cur == ':')
1612 result->flags |= DIGRAPH;
1613 result->type = CPP_OPEN_SQUARE;
1615 else if (*buffer->cur == '%')
1618 result->flags |= DIGRAPH;
1619 result->type = CPP_OPEN_BRACE;
1625 result->type = CPP_GREATER;
1626 if (*buffer->cur == '=')
1627 buffer->cur++, result->type = CPP_GREATER_EQ;
1628 else if (*buffer->cur == '>')
1631 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1636 result->type = CPP_MOD;
1637 if (*buffer->cur == '=')
1638 buffer->cur++, result->type = CPP_MOD_EQ;
1639 else if (CPP_OPTION (pfile, digraphs))
1641 if (*buffer->cur == ':')
1644 result->flags |= DIGRAPH;
1645 result->type = CPP_HASH;
1646 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1647 buffer->cur += 2, result->type = CPP_PASTE;
1649 else if (*buffer->cur == '>')
1652 result->flags |= DIGRAPH;
1653 result->type = CPP_CLOSE_BRACE;
1659 result->type = CPP_DOT;
1660 if (ISDIGIT (*buffer->cur))
1662 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1663 result->type = CPP_NUMBER;
1664 if (CPP_OPTION (pfile, pedantic_parse_number))
1665 pedantic_lex_number (pfile, &result->val.str);
1667 lex_number (pfile, &result->val.str, &nst);
1668 warn_about_normalization (pfile, result, &nst);
1670 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1671 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1672 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1673 buffer->cur++, result->type = CPP_DOT_STAR;
1677 result->type = CPP_PLUS;
1678 if (*buffer->cur == '+')
1679 buffer->cur++, result->type = CPP_PLUS_PLUS;
1680 else if (*buffer->cur == '=')
1681 buffer->cur++, result->type = CPP_PLUS_EQ;
1685 result->type = CPP_MINUS;
1686 if (*buffer->cur == '>')
1689 result->type = CPP_DEREF;
1690 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1691 buffer->cur++, result->type = CPP_DEREF_STAR;
1693 else if (*buffer->cur == '-')
1694 buffer->cur++, result->type = CPP_MINUS_MINUS;
1695 else if (*buffer->cur == '=')
1696 buffer->cur++, result->type = CPP_MINUS_EQ;
1700 result->type = CPP_AND;
1701 if (*buffer->cur == '&')
1702 buffer->cur++, result->type = CPP_AND_AND;
1703 else if (*buffer->cur == '=')
1704 buffer->cur++, result->type = CPP_AND_EQ;
1708 result->type = CPP_OR;
1709 if (*buffer->cur == '|')
1710 buffer->cur++, result->type = CPP_OR_OR;
1711 else if (*buffer->cur == '=')
1712 buffer->cur++, result->type = CPP_OR_EQ;
1716 result->type = CPP_COLON;
1717 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1718 buffer->cur++, result->type = CPP_SCOPE;
1719 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1722 result->flags |= DIGRAPH;
1723 result->type = CPP_CLOSE_SQUARE;
1727 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1728 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1729 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1730 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1731 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1733 case '?': result->type = CPP_QUERY; break;
1734 case '~': result->type = CPP_COMPL; break;
1735 case ',': result->type = CPP_COMMA; break;
1736 case '(': result->type = CPP_OPEN_PAREN; break;
1737 case ')': result->type = CPP_CLOSE_PAREN; break;
1738 case '[': result->type = CPP_OPEN_SQUARE; break;
1739 case ']': result->type = CPP_CLOSE_SQUARE; break;
1740 case '{': result->type = CPP_OPEN_BRACE; break;
1741 case '}': result->type = CPP_CLOSE_BRACE; break;
1742 case ';': result->type = CPP_SEMICOLON; break;
1744 /* @ is a punctuator in Objective-C. */
1745 case '@': result->type = CPP_ATSIGN; break;
1750 const uchar *base = --buffer->cur;
1751 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1753 if (forms_identifier_p (pfile, true, &nst))
1755 result->type = CPP_NAME;
1756 result->val.node = lex_identifier (pfile, base, true, &nst);
1757 warn_about_normalization (pfile, result, &nst);
1764 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1771 /* An upper bound on the number of bytes needed to spell TOKEN.
1772 Does not include preceding whitespace. */
1774 cpp_token_len (const cpp_token *token)
1778 switch (TOKEN_SPELL (token))
1780 default: len = 6; break;
1781 case SPELL_LITERAL: len = token->val.str.len; break;
1782 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1788 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1789 Return the number of bytes read out of NAME. (There are always
1790 10 bytes written to BUFFER.) */
1793 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1799 unsigned long utf32;
1801 /* Compute the length of the UTF-8 sequence. */
1802 for (t = *name; t & 0x80; t <<= 1)
1805 utf32 = *name & (0x7F >> ucn_len);
1806 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1808 utf32 = (utf32 << 6) | (*++name & 0x3F);
1810 /* Ill-formed UTF-8. */
1811 if ((*name & ~0x3F) != 0x80)
1817 for (j = 7; j >= 0; j--)
1818 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1823 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1824 already contain the enough space to hold the token's spelling.
1825 Returns a pointer to the character after the last character written.
1826 FORSTRING is true if this is to be the spelling after translation
1827 phase 1 (this is different for UCNs).
1828 FIXME: Would be nice if we didn't need the PFILE argument. */
1830 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1831 unsigned char *buffer, bool forstring)
1833 switch (TOKEN_SPELL (token))
1835 case SPELL_OPERATOR:
1837 const unsigned char *spelling;
1840 if (token->flags & DIGRAPH)
1842 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1843 else if (token->flags & NAMED_OP)
1846 spelling = TOKEN_NAME (token);
1848 while ((c = *spelling++) != '\0')
1857 memcpy (buffer, NODE_NAME (token->val.node),
1858 NODE_LEN (token->val.node));
1859 buffer += NODE_LEN (token->val.node);
1864 const unsigned char * name = NODE_NAME (token->val.node);
1866 for (i = 0; i < NODE_LEN (token->val.node); i++)
1867 if (name[i] & ~0x7F)
1869 i += utf8_to_ucn (buffer, name + i) - 1;
1873 *buffer++ = NODE_NAME (token->val.node)[i];
1878 memcpy (buffer, token->val.str.text, token->val.str.len);
1879 buffer += token->val.str.len;
1883 cpp_error (pfile, CPP_DL_ICE,
1884 "unspellable token %s", TOKEN_NAME (token));
1891 /* Returns TOKEN spelt as a null-terminated string. The string is
1892 freed when the reader is destroyed. Useful for diagnostics. */
1894 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1896 unsigned int len = cpp_token_len (token) + 1;
1897 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1899 end = cpp_spell_token (pfile, token, start, false);
1905 /* Used by C front ends, which really should move to using
1906 cpp_token_as_text. */
1908 cpp_type2name (enum cpp_ttype type)
1910 return (const char *) token_spellings[type].name;
1913 /* Writes the spelling of token to FP, without any preceding space.
1914 Separated from cpp_spell_token for efficiency - to avoid stdio
1915 double-buffering. */
1917 cpp_output_token (const cpp_token *token, FILE *fp)
1919 switch (TOKEN_SPELL (token))
1921 case SPELL_OPERATOR:
1923 const unsigned char *spelling;
1926 if (token->flags & DIGRAPH)
1928 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1929 else if (token->flags & NAMED_OP)
1932 spelling = TOKEN_NAME (token);
1937 while ((c = *++spelling) != '\0');
1945 const unsigned char * name = NODE_NAME (token->val.node);
1947 for (i = 0; i < NODE_LEN (token->val.node); i++)
1948 if (name[i] & ~0x7F)
1950 unsigned char buffer[10];
1951 i += utf8_to_ucn (buffer, name + i) - 1;
1952 fwrite (buffer, 1, 10, fp);
1955 fputc (NODE_NAME (token->val.node)[i], fp);
1960 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1964 /* An error, most probably. */
1969 /* Compare two tokens. */
1971 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1973 if (a->type == b->type && a->flags == b->flags)
1974 switch (TOKEN_SPELL (a))
1976 default: /* Keep compiler happy. */
1977 case SPELL_OPERATOR:
1980 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1982 return a->val.node == b->val.node;
1984 return (a->val.str.len == b->val.str.len
1985 && !memcmp (a->val.str.text, b->val.str.text,
1992 /* Returns nonzero if a space should be inserted to avoid an
1993 accidental token paste for output. For simplicity, it is
1994 conservative, and occasionally advises a space where one is not
1995 needed, e.g. "." and ".2". */
1997 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1998 const cpp_token *token2)
2000 enum cpp_ttype a = token1->type, b = token2->type;
2003 if (token1->flags & NAMED_OP)
2005 if (token2->flags & NAMED_OP)
2009 if (token2->flags & DIGRAPH)
2010 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2011 else if (token_spellings[b].category == SPELL_OPERATOR)
2012 c = token_spellings[b].name[0];
2014 /* Quickly get everything that can paste with an '='. */
2015 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2020 case CPP_GREATER: return c == '>';
2021 case CPP_LESS: return c == '<' || c == '%' || c == ':';
2022 case CPP_PLUS: return c == '+';
2023 case CPP_MINUS: return c == '-' || c == '>';
2024 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
2025 case CPP_MOD: return c == ':' || c == '>';
2026 case CPP_AND: return c == '&';
2027 case CPP_OR: return c == '|';
2028 case CPP_COLON: return c == ':' || c == '>';
2029 case CPP_DEREF: return c == '*';
2030 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
2031 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
2032 case CPP_NAME: return ((b == CPP_NUMBER
2033 && name_p (pfile, &token2->val.str))
2035 || b == CPP_CHAR || b == CPP_STRING); /* L */
2036 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
2037 || c == '.' || c == '+' || c == '-');
2039 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
2041 || (CPP_OPTION (pfile, objc)
2042 && token1->val.str.text[0] == '@'
2043 && (b == CPP_NAME || b == CPP_STRING)));
2050 /* Output all the remaining tokens on the current line, and a newline
2051 character, to FP. Leading whitespace is removed. If there are
2052 macros, special token padding is not performed. */
2054 cpp_output_line (cpp_reader *pfile, FILE *fp)
2056 const cpp_token *token;
2058 token = cpp_get_token (pfile);
2059 while (token->type != CPP_EOF)
2061 cpp_output_token (token, fp);
2062 token = cpp_get_token (pfile);
2063 if (token->flags & PREV_WHITE)
2070 /* Return a string representation of all the remaining tokens on the
2071 current line. The result is allocated using xmalloc and must be
2072 freed by the caller. */
2074 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2076 const cpp_token *token;
2077 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2078 unsigned int alloced = 120 + out;
2079 unsigned char *result = (unsigned char *) xmalloc (alloced);
2081 /* If DIR_NAME is empty, there are no initial contents. */
2084 sprintf ((char *) result, "#%s ", dir_name);
2088 token = cpp_get_token (pfile);
2089 while (token->type != CPP_EOF)
2091 unsigned char *last;
2092 /* Include room for a possible space and the terminating nul. */
2093 unsigned int len = cpp_token_len (token) + 2;
2095 if (out + len > alloced)
2098 if (out + len > alloced)
2099 alloced = out + len;
2100 result = (unsigned char *) xrealloc (result, alloced);
2103 last = cpp_spell_token (pfile, token, &result[out], 0);
2104 out = last - result;
2106 token = cpp_get_token (pfile);
2107 if (token->flags & PREV_WHITE)
2108 result[out++] = ' ';
2115 /* Memory buffers. Changing these three constants can have a dramatic
2116 effect on performance. The values here are reasonable defaults,
2117 but might be tuned. If you adjust them, be sure to test across a
2118 range of uses of cpplib, including heavy nested function-like macro
2119 expansion. Also check the change in peak memory usage (NJAMD is a
2120 good tool for this). */
2121 #define MIN_BUFF_SIZE 8000
2122 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2123 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2124 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2126 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2127 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2130 /* Create a new allocation buffer. Place the control block at the end
2131 of the buffer, so that buffer overflows will cause immediate chaos. */
2133 new_buff (size_t len)
2136 unsigned char *base;
2138 if (len < MIN_BUFF_SIZE)
2139 len = MIN_BUFF_SIZE;
2140 len = CPP_ALIGN (len);
2142 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2143 result = (_cpp_buff *) (base + len);
2144 result->base = base;
2146 result->limit = base + len;
2147 result->next = NULL;
2151 /* Place a chain of unwanted allocation buffers on the free list. */
2153 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2155 _cpp_buff *end = buff;
2159 end->next = pfile->free_buffs;
2160 pfile->free_buffs = buff;
2163 /* Return a free buffer of size at least MIN_SIZE. */
2165 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
2167 _cpp_buff *result, **p;
2169 for (p = &pfile->free_buffs;; p = &(*p)->next)
2174 return new_buff (min_size);
2176 size = result->limit - result->base;
2177 /* Return a buffer that's big enough, but don't waste one that's
2179 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2184 result->next = NULL;
2185 result->cur = result->base;
2189 /* Creates a new buffer with enough space to hold the uncommitted
2190 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2191 the excess bytes to the new buffer. Chains the new buffer after
2192 BUFF, and returns the new buffer. */
2194 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2196 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2197 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2199 buff->next = new_buff;
2200 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2204 /* Creates a new buffer with enough space to hold the uncommitted
2205 remaining bytes of the buffer pointed to by BUFF, and at least
2206 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2207 Chains the new buffer before the buffer pointed to by BUFF, and
2208 updates the pointer to point to the new buffer. */
2210 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2212 _cpp_buff *new_buff, *old_buff = *pbuff;
2213 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2215 new_buff = _cpp_get_buff (pfile, size);
2216 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2217 new_buff->next = old_buff;
2221 /* Free a chain of buffers starting at BUFF. */
2223 _cpp_free_buff (_cpp_buff *buff)
2227 for (; buff; buff = next)
2234 /* Allocate permanent, unaligned storage of length LEN. */
2236 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2238 _cpp_buff *buff = pfile->u_buff;
2239 unsigned char *result = buff->cur;
2241 if (len > (size_t) (buff->limit - result))
2243 buff = _cpp_get_buff (pfile, len);
2244 buff->next = pfile->u_buff;
2245 pfile->u_buff = buff;
2249 buff->cur = result + len;
2253 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2254 That buffer is used for growing allocations when saving macro
2255 replacement lists in a #define, and when parsing an answer to an
2256 assertion in #assert, #unassert or #if (and therefore possibly
2257 whilst expanding macros). It therefore must not be used by any
2258 code that they might call: specifically the lexer and the guts of
2261 All existing other uses clearly fit this restriction: storing
2262 registered pragmas during initialization. */
2264 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2266 _cpp_buff *buff = pfile->a_buff;
2267 unsigned char *result = buff->cur;
2269 if (len > (size_t) (buff->limit - result))
2271 buff = _cpp_get_buff (pfile, len);
2272 buff->next = pfile->a_buff;
2273 pfile->a_buff = buff;
2277 buff->cur = result + len;
2281 /* Say which field of TOK is in use. */
2283 enum cpp_token_fld_kind
2284 cpp_token_val_index (cpp_token *tok)
2286 switch (TOKEN_SPELL (tok))
2289 return CPP_TOKEN_FLD_NODE;
2291 return CPP_TOKEN_FLD_STR;
2293 if (tok->type == CPP_MACRO_ARG)
2294 return CPP_TOKEN_FLD_ARG_NO;
2295 else if (tok->type == CPP_PADDING)
2296 return CPP_TOKEN_FLD_SOURCE;
2297 else if (tok->type == CPP_PRAGMA)
2298 return CPP_TOKEN_FLD_PRAGMA;
2299 /* else fall through */
2301 return CPP_TOKEN_FLD_NONE;