1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
38 enum spell_type category;
39 const unsigned char *name;
42 static const unsigned char *const digraph_spellings[] =
43 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, U s },
46 #define TK(e, s) { SPELL_ ## s, U #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
65 static _cpp_buff *new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token *token, const char *string)
75 if (token->type != CPP_NAME)
78 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 if (buffer->notes_used == buffer->notes_cap)
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
98 /* Returns with a logical line that contains no escaped newlines or
99 trigraphs. This is a time-critical inner loop. */
101 _cpp_clean_line (cpp_reader *pfile)
107 buffer = pfile->buffer;
108 buffer->cur_note = buffer->notes_used = 0;
109 buffer->cur = buffer->line_base = buffer->next_line;
110 buffer->need_line = false;
111 s = buffer->next_line - 1;
113 if (!buffer->from_stage3)
115 const uchar *pbackslash = NULL;
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
123 if (__builtin_expect (c == '\n', false)
124 || __builtin_expect (c == '\r', false))
128 if (__builtin_expect (s == buffer->rlimit, false))
131 /* DOS line ending? */
132 if (__builtin_expect (c == '\r', false)
136 if (s == buffer->rlimit)
140 if (__builtin_expect (pbackslash == NULL, true))
143 /* Check for escaped newline. */
145 while (is_nvspace (p[-1]))
147 if (p - 1 != pbackslash)
150 /* Have an escaped newline; process it and proceed to
152 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 buffer->next_line = p - 1;
157 if (__builtin_expect (c == '\\', false))
159 else if (__builtin_expect (c == '?', false)
160 && __builtin_expect (s[1] == '?', false)
161 && _cpp_trigraph_map[s[2]])
163 /* Have a trigraph. We may or may not have to convert
164 it. Add a line note regardless, for -Wtrigraphs. */
165 add_line_note (buffer, s, s[2]);
166 if (CPP_OPTION (pfile, trigraphs))
168 /* We do, and that means we have to switch to the
171 *d = _cpp_trigraph_map[s[2]];
184 if (c == '\n' || c == '\r')
186 /* Handle DOS line endings. */
187 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 if (s == buffer->rlimit)
194 while (p != buffer->next_line && is_nvspace (p[-1]))
196 if (p == buffer->next_line || p[-1] != '\\')
199 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
201 buffer->next_line = p - 1;
203 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205 /* Add a note regardless, for the benefit of -Wtrigraphs. */
206 add_line_note (buffer, d, s[2]);
207 if (CPP_OPTION (pfile, trigraphs))
209 *d = _cpp_trigraph_map[s[2]];
219 while (*s != '\n' && *s != '\r');
222 /* Handle DOS line endings. */
223 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
229 /* A sentinel note that should never be processed. */
230 add_line_note (buffer, d + 1, '\n');
231 buffer->next_line = s + 1;
234 /* Return true if the trigraph indicated by NOTE should be warned
235 about in a comment. */
237 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
241 /* Within comments we don't warn about trigraphs, unless the
242 trigraph forms an escaped newline, as that may change
244 if (note->type != '/')
247 /* If -trigraphs, then this was an escaped newline iff the next note
249 if (CPP_OPTION (pfile, trigraphs))
250 return note[1].pos == note->pos;
252 /* Otherwise, see if this forms an escaped newline. */
254 while (is_nvspace (*p))
257 /* There might have been escaped newlines between the trigraph and the
258 newline we found. Hence the position test. */
259 return (*p == '\n' && p < note[1].pos);
262 /* Process the notes created by add_line_note as far as the current
265 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
267 cpp_buffer *buffer = pfile->buffer;
271 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
274 if (note->pos > buffer->cur)
278 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
280 if (note->type == '\\' || note->type == ' ')
282 if (note->type == ' ' && !in_comment)
283 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
284 "backslash and newline separated by space");
286 if (buffer->next_line > buffer->rlimit)
288 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
289 "backslash-newline at end of file");
290 /* Prevent "no newline at end of file" warning. */
291 buffer->next_line = buffer->rlimit;
294 buffer->line_base = note->pos;
295 CPP_INCREMENT_LINE (pfile, 0);
297 else if (_cpp_trigraph_map[note->type])
299 if (CPP_OPTION (pfile, warn_trigraphs)
300 && (!in_comment || warn_in_comment (pfile, note)))
302 if (CPP_OPTION (pfile, trigraphs))
303 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
304 "trigraph ??%c converted to %c",
306 (int) _cpp_trigraph_map[note->type]);
310 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
311 "trigraph ??%c ignored, use -trigraphs to enable",
321 /* SDCC _asm specific */
322 /* Skip an _asm ... _endasm block. We find the end of the comment by
323 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
326 skip_asm_block (cpp_reader *pfile)
328 #define _ENDASM_STR "endasm"
329 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
331 cpp_buffer *buffer = pfile->buffer;
336 while (buffer->cur != buffer->rlimit)
338 prev_space = is_space(c);
341 if (prev_space && c == '_')
343 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
344 strncmp((char *)buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
346 buffer->cur += _ENDASM_LEN;
355 _cpp_process_line_notes (pfile, true);
356 if (buffer->next_line >= buffer->rlimit)
358 _cpp_clean_line (pfile);
360 cols = buffer->next_line - buffer->line_base;
361 CPP_INCREMENT_LINE (pfile, cols);
365 _cpp_process_line_notes (pfile, true);
369 /* Skip a C-style block comment. We find the end of the comment by
370 seeing if an asterisk is before every '/' we encounter. Returns
371 nonzero if comment terminated by EOF, zero otherwise.
373 Buffer->cur points to the initial asterisk of the comment. */
375 _cpp_skip_block_comment (cpp_reader *pfile)
377 cpp_buffer *buffer = pfile->buffer;
378 const uchar *cur = buffer->cur;
387 /* People like decorating comments with '*', so check for '/'
388 instead for efficiency. */
396 /* Warn about potential nested comments, but not if the '/'
397 comes immediately before the true comment delimiter.
398 Don't bother to get it right across escaped newlines. */
399 if (CPP_OPTION (pfile, warn_comments)
400 && cur[0] == '*' && cur[1] != '/')
403 cpp_error_with_line (pfile, CPP_DL_WARNING,
404 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
405 "\"/*\" within comment");
411 buffer->cur = cur - 1;
412 _cpp_process_line_notes (pfile, true);
413 if (buffer->next_line >= buffer->rlimit)
415 _cpp_clean_line (pfile);
417 cols = buffer->next_line - buffer->line_base;
418 CPP_INCREMENT_LINE (pfile, cols);
425 _cpp_process_line_notes (pfile, true);
429 /* Skip a C++ line comment, leaving buffer->cur pointing to the
430 terminating newline. Handles escaped newlines. Returns nonzero
431 if a multiline comment. */
433 skip_line_comment (cpp_reader *pfile)
435 cpp_buffer *buffer = pfile->buffer;
436 unsigned int orig_line = pfile->line_table->highest_line;
438 while (*buffer->cur != '\n')
441 _cpp_process_line_notes (pfile, true);
442 return orig_line != pfile->line_table->highest_line;
445 /* Skips whitespace, saving the next non-whitespace character. */
447 skip_whitespace (cpp_reader *pfile, cppchar_t c)
449 cpp_buffer *buffer = pfile->buffer;
450 bool saw_NUL = false;
454 /* Horizontal space always OK. */
455 if (c == ' ' || c == '\t')
457 /* Just \f \v or \0 left. */
460 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
461 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
462 CPP_BUF_COL (buffer),
463 "%s in preprocessing directive",
464 c == '\f' ? "form feed" : "vertical tab");
468 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
469 while (is_nvspace (c));
472 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
477 /* See if the characters of a number token are valid in a name (no
480 name_p (cpp_reader *pfile, const cpp_string *string)
484 for (i = 0; i < string->len; i++)
485 if (!is_idchar (string->text[i]))
491 /* After parsing an identifier or other sequence, produce a warning about
492 sequences not in NFC/NFKC. */
494 warn_about_normalization (cpp_reader *pfile,
495 const cpp_token *token,
496 const struct normalize_state *s)
498 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
499 && !pfile->state.skipping)
501 /* Make sure that the token is printed using UCNs, even
502 if we'd otherwise happily print UTF-8. */
503 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
506 sz = cpp_spell_token (pfile, token, buf, false) - buf;
507 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
508 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
509 "`%.*s' is not in NFKC", (int) sz, buf);
511 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
512 "`%.*s' is not in NFC", (int) sz, buf);
516 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
517 an identifier. FIRST is TRUE if this starts an identifier. */
519 forms_identifier_p (cpp_reader *pfile, int first,
520 struct normalize_state *state)
522 cpp_buffer *buffer = pfile->buffer;
524 if (*buffer->cur == '$')
526 if (!CPP_OPTION (pfile, dollars_in_ident))
530 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
532 CPP_OPTION (pfile, warn_dollars) = 0;
533 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
539 /* Is this a syntactically valid UCN? */
540 if (CPP_OPTION (pfile, extended_identifiers)
541 && *buffer->cur == '\\'
542 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
545 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
554 /* Lex an identifier starting at BUFFER->CUR - 1. */
555 static cpp_hashnode *
556 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
557 struct normalize_state *nst)
559 cpp_hashnode *result;
562 unsigned int hash = HT_HASHSTEP (0, *base);
564 cur = pfile->buffer->cur;
566 while (ISIDNUM (*cur))
568 hash = HT_HASHSTEP (hash, *cur);
571 pfile->buffer->cur = cur;
572 if (starts_ucn || forms_identifier_p (pfile, false, nst))
574 /* Slower version for identifiers containing UCNs (or $). */
576 while (ISIDNUM (*pfile->buffer->cur))
578 pfile->buffer->cur++;
579 NORMALIZE_STATE_UPDATE_IDNUM (nst);
581 } while (forms_identifier_p (pfile, false, nst));
582 result = _cpp_interpret_identifier (pfile, base,
583 pfile->buffer->cur - base);
588 hash = HT_HASHFINISH (hash, len);
590 result = (cpp_hashnode *)
591 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
594 /* Rarely, identifiers require diagnostics when lexed. */
595 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
596 && !pfile->state.skipping, 0))
598 /* It is allowed to poison the same identifier twice. */
599 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
600 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
603 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
604 replacement list of a variadic macro. */
605 if (result == pfile->spec_nodes.n__VA_ARGS__
606 && !pfile->state.va_args_ok)
607 cpp_error (pfile, CPP_DL_PEDWARN,
608 "__VA_ARGS__ can only appear in the expansion"
609 " of a C99 variadic macro");
616 /* Pedantic parse a number, beginning with character C, skipping embedded
617 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
618 before C. Place the result in NUMBER. */
620 pedantic_lex_number (cpp_reader *pfile, cpp_string *number)
622 #define get_effective_char(pfile) (*pfile->buffer->cur++)
623 #define BACKUP() (--pfile->buffer->cur)
625 enum num_type_e { NT_DEC, NT_HEX } num_type = NT_DEC;
626 enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
628 uchar c = *(pfile->buffer->cur - 1);
629 struct obstack *stack = &pfile->hash_table->stack;
638 obstack_1grow (stack, '.');
639 c = get_effective_char(pfile);
647 obstack_1grow (stack, c);
648 c = get_effective_char(pfile);
656 obstack_1grow (stack, c);
657 c = get_effective_char(pfile);
663 obstack_1grow (stack, c);
664 c = get_effective_char(pfile);
675 if (NT_DEC == num_type)
681 obstack_1grow (stack, c);
682 c = get_effective_char(pfile);
689 obstack_1grow (stack, c);
690 c = get_effective_char(pfile);
693 else if ('E' == c || 'e' == c)
695 if (has_whole || has_fract)
699 obstack_1grow (stack, c);
700 c = get_effective_char(pfile);
713 obstack_1grow (stack, c);
714 c = get_effective_char(pfile);
721 obstack_1grow (stack, c);
722 c = get_effective_char(pfile);
725 else if ('P' == c || 'p' == c)
727 if (has_whole || has_fract)
731 obstack_1grow (stack, c);
732 c = get_effective_char(pfile);
739 num_part = NP_INT_SUFFIX;
743 if (NT_DEC == num_type)
749 obstack_1grow (stack, c);
750 c = get_effective_char(pfile);
753 if ('E' == c || 'e' == c)
755 if (has_whole || has_fract)
759 obstack_1grow (stack, c);
760 c = get_effective_char(pfile);
771 obstack_1grow (stack, c);
772 c = get_effective_char(pfile);
775 if ('P' == c || 'p' == c)
777 if (has_whole || has_fract)
781 obstack_1grow (stack, c);
782 c = get_effective_char(pfile);
787 num_part = NP_FLOAT_SUFFIX;
791 if ('+' == c || '-' == c)
794 obstack_1grow (stack, c);
795 c = get_effective_char(pfile);
801 obstack_1grow (stack, c);
802 c = get_effective_char(pfile);
805 num_part = NP_FLOAT_SUFFIX;
809 if ('L' == c || 'l' == c)
814 obstack_1grow (stack, c);
815 c = get_effective_char(pfile);
820 obstack_1grow (stack, c);
821 c = get_effective_char(pfile);
824 else if ('U' == c || 'u' == c)
827 obstack_1grow (stack, c);
828 c = get_effective_char(pfile);
832 case NP_FLOAT_SUFFIX:
833 if ('F' == c || 'f' == c)
836 obstack_1grow (stack, c);
837 c = get_effective_char(pfile);
839 else if ('L' == c || 'l' == c)
842 obstack_1grow (stack, c);
843 c = get_effective_char(pfile);
850 /* Step back over the unwanted char. */
853 number->text = obstack_finish (stack);
857 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
859 lex_number (cpp_reader *pfile, cpp_string *number,
860 struct normalize_state *nst)
866 base = pfile->buffer->cur - 1;
869 cur = pfile->buffer->cur;
871 /* N.B. ISIDNUM does not include $. */
872 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
875 NORMALIZE_STATE_UPDATE_IDNUM (nst);
878 pfile->buffer->cur = cur;
880 while (forms_identifier_p (pfile, false, nst));
882 number->len = cur - base;
883 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
884 memcpy (dest, base, number->len);
885 dest[number->len] = '\0';
889 /* Create a token of type TYPE with a literal spelling. */
891 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
892 unsigned int len, enum cpp_ttype type)
894 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
896 memcpy (dest, base, len);
899 token->val.str.len = len;
900 token->val.str.text = dest;
903 /* Lexes a string, character constant, or angle-bracketed header file
904 name. The stored string contains the spelling, including opening
905 quote and leading any leading 'L'. It returns the type of the
906 literal, or CPP_OTHER if it was not properly terminated.
908 The spelling is NUL-terminated, but it is not guaranteed that this
909 is the first NUL since embedded NULs are preserved. */
911 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
913 bool saw_NUL = false;
915 cppchar_t terminator;
920 if (terminator == 'L')
922 if (terminator == '\"')
923 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
924 else if (terminator == '\'')
925 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
927 terminator = '>', type = CPP_HEADER_NAME;
931 cppchar_t c = *cur++;
933 /* In #include-style directives, terminators are not escapable. */
934 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
936 else if (c == terminator)
948 if (saw_NUL && !pfile->state.skipping)
949 cpp_error (pfile, CPP_DL_WARNING,
950 "null character(s) preserved in literal");
952 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
953 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
956 pfile->buffer->cur = cur;
957 create_literal (pfile, token, base, cur - base, type);
960 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
961 comment blocks (when executed with -C option) and
962 _asm (SDCPP specific) blocks */
964 /* Count and copy characters from src to dest, excluding CRs:
965 CRs are automatically generated, because the output is
966 opened in TEXT mode. If dest == NULL, only count chars */
968 copy_text_chars (unsigned char *dest, const unsigned char *src, unsigned int len)
971 const unsigned char *p;
973 for (p = src; p != src + len; ++p)
988 /* SDCC _asm specific */
989 /* The stored comment includes the comment start and any terminator. */
991 save_asm (cpp_reader *pfile, cpp_token *token, const unsigned char *from)
993 #define _ASM_STR "_asm"
994 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
996 unsigned char *buffer;
997 unsigned int text_len, len;
999 len = pfile->buffer->cur - from;
1000 /* + _ASM_LEN for the initial '_asm'. */
1001 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
1002 buffer = _cpp_unaligned_alloc (pfile, text_len);
1005 token->type = CPP_ASM;
1006 token->val.str.len = text_len;
1007 token->val.str.text = buffer;
1009 memcpy (buffer, _ASM_STR, _ASM_LEN);
1010 copy_text_chars (buffer + _ASM_LEN, from, len);
1013 /* The stored comment includes the comment start and any terminator. */
1015 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1018 unsigned char *buffer;
1019 unsigned int len, clen;
1021 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1023 /* C++ comments probably (not definitely) have moved past a new
1024 line, which we don't want to save in the comment. */
1025 if (is_vspace (pfile->buffer->cur[-1]))
1028 /* If we are currently in a directive, then we need to store all
1029 C++ comments as C comments internally, and so we need to
1030 allocate a little extra space in that case.
1032 Note that the only time we encounter a directive here is
1033 when we are saving comments in a "#define". */
1034 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1036 buffer = _cpp_unaligned_alloc (pfile, clen);
1038 token->type = CPP_COMMENT;
1039 token->val.str.len = clen;
1040 token->val.str.text = buffer;
1043 copy_text_chars (buffer + 1, from, len);
1045 /* Finish conversion to a C comment, if necessary. */
1046 if (pfile->state.in_directive && type == '/')
1049 buffer[clen - 2] = '*';
1050 buffer[clen - 1] = '/';
1054 /* Allocate COUNT tokens for RUN. */
1056 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1058 run->base = XNEWVEC (cpp_token, count);
1059 run->limit = run->base + count;
1063 /* Returns the next tokenrun, or creates one if there is none. */
1065 next_tokenrun (tokenrun *run)
1067 if (run->next == NULL)
1069 run->next = XNEW (tokenrun);
1070 run->next->prev = run;
1071 _cpp_init_tokenrun (run->next, 250);
1077 /* Allocate a single token that is invalidated at the same time as the
1078 rest of the tokens on the line. Has its line and col set to the
1079 same as the last lexed token, so that diagnostics appear in the
1082 _cpp_temp_token (cpp_reader *pfile)
1084 cpp_token *old, *result;
1086 old = pfile->cur_token - 1;
1087 if (pfile->cur_token == pfile->cur_run->limit)
1089 pfile->cur_run = next_tokenrun (pfile->cur_run);
1090 pfile->cur_token = pfile->cur_run->base;
1093 result = pfile->cur_token++;
1094 result->src_loc = old->src_loc;
1098 /* Lex a token into RESULT (external interface). Takes care of issues
1099 like directive handling, token lookahead, multiple include
1100 optimization and skipping. */
1102 _cpp_lex_token (cpp_reader *pfile)
1108 if (pfile->cur_token == pfile->cur_run->limit)
1110 pfile->cur_run = next_tokenrun (pfile->cur_run);
1111 pfile->cur_token = pfile->cur_run->base;
1113 /* We assume that the current token is somewhere in the current
1115 if (pfile->cur_token < pfile->cur_run->base
1116 || pfile->cur_token >= pfile->cur_run->limit)
1119 if (pfile->lookaheads)
1121 pfile->lookaheads--;
1122 result = pfile->cur_token++;
1125 result = _cpp_lex_direct (pfile);
1127 if (result->flags & BOL)
1129 /* Is this a directive. If _cpp_handle_directive returns
1130 false, it is an assembler #. */
1131 if (result->type == CPP_HASH
1132 /* 6.10.3 p 11: Directives in a list of macro arguments
1133 gives undefined behavior. This implementation
1134 handles the directive as normal. */
1135 && pfile->state.parsing_args != 1)
1137 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1139 if (pfile->directive_result.type == CPP_PADDING)
1141 result = &pfile->directive_result;
1144 else if (pfile->state.in_deferred_pragma)
1145 result = &pfile->directive_result;
1147 if (pfile->cb.line_change && !pfile->state.skipping)
1148 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1151 /* We don't skip tokens in directives. */
1152 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1155 /* Outside a directive, invalidate controlling macros. At file
1156 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1157 get here and MI optimization works. */
1158 pfile->mi_valid = false;
1160 if (!pfile->state.skipping || result->type == CPP_EOF)
1167 /* Returns true if a fresh line has been loaded. */
1169 _cpp_get_fresh_line (cpp_reader *pfile)
1173 /* We can't get a new line until we leave the current directive. */
1174 if (pfile->state.in_directive)
1179 cpp_buffer *buffer = pfile->buffer;
1181 if (!buffer->need_line)
1184 if (buffer->next_line < buffer->rlimit)
1186 _cpp_clean_line (pfile);
1190 /* First, get out of parsing arguments state. */
1191 if (pfile->state.parsing_args)
1194 /* End of buffer. Non-empty files should end in a newline. */
1195 if (buffer->buf != buffer->rlimit
1196 && buffer->next_line > buffer->rlimit
1197 && !buffer->from_stage3)
1199 /* Clip to buffer size. */
1200 buffer->next_line = buffer->rlimit;
1203 return_at_eof = buffer->return_at_eof;
1204 _cpp_pop_buffer (pfile);
1205 if (pfile->buffer == NULL || return_at_eof)
1210 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1213 result->type = ELSE_TYPE; \
1214 if (*buffer->cur == CHAR) \
1215 buffer->cur++, result->type = THEN_TYPE; \
1219 /* Lex a token into pfile->cur_token, which is also incremented, to
1220 get diagnostics pointing to the correct location.
1222 Does not handle issues such as token lookahead, multiple-include
1223 optimization, directives, skipping etc. This function is only
1224 suitable for use by _cpp_lex_token, and in special cases like
1225 lex_expansion_token which doesn't care for any of these issues.
1227 When meeting a newline, returns CPP_EOF if parsing a directive,
1228 otherwise returns to the start of the token buffer if permissible.
1229 Returns the location of the lexed token. */
1231 _cpp_lex_direct (cpp_reader *pfile)
1235 const unsigned char *comment_start;
1236 cpp_token *result = pfile->cur_token++;
1240 buffer = pfile->buffer;
1241 if (buffer->need_line)
1243 if (pfile->state.in_deferred_pragma)
1245 result->type = CPP_PRAGMA_EOL;
1246 pfile->state.in_deferred_pragma = false;
1247 if (!pfile->state.pragma_allow_expansion)
1248 pfile->state.prevent_expansion--;
1251 if (!_cpp_get_fresh_line (pfile))
1253 result->type = CPP_EOF;
1254 if (!pfile->state.in_directive)
1256 /* Tell the compiler the line number of the EOF token. */
1257 result->src_loc = pfile->line_table->highest_line;
1258 result->flags = BOL;
1262 if (!pfile->keep_tokens)
1264 pfile->cur_run = &pfile->base_run;
1265 result = pfile->base_run.base;
1266 pfile->cur_token = result + 1;
1268 result->flags = BOL;
1269 if (pfile->state.parsing_args == 2)
1270 result->flags |= PREV_WHITE;
1272 buffer = pfile->buffer;
1274 result->src_loc = pfile->line_table->highest_line;
1277 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1278 && !pfile->overlaid_buffer)
1280 _cpp_process_line_notes (pfile, false);
1281 result->src_loc = pfile->line_table->highest_line;
1285 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1286 CPP_BUF_COLUMN (buffer, buffer->cur));
1290 case ' ': case '\t': case '\f': case '\v': case '\0':
1291 result->flags |= PREV_WHITE;
1292 skip_whitespace (pfile, c);
1296 if (buffer->cur < buffer->rlimit)
1297 CPP_INCREMENT_LINE (pfile, 0);
1298 buffer->need_line = true;
1301 case '0': case '1': case '2': case '3': case '4':
1302 case '5': case '6': case '7': case '8': case '9':
1304 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1305 result->type = CPP_NUMBER;
1306 if (CPP_OPTION(pfile, pedantic_parse_number))
1307 pedantic_lex_number (pfile, &result->val.str);
1309 lex_number (pfile, &result->val.str, &nst);
1310 warn_about_normalization (pfile, result, &nst);
1315 /* 'L' may introduce wide characters or strings. */
1316 if (*buffer->cur == '\'' || *buffer->cur == '"')
1318 lex_string (pfile, result, buffer->cur - 1);
1324 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1325 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1326 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1327 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1329 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1330 case 'G': case 'H': case 'I': case 'J': case 'K':
1331 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1332 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1334 result->type = CPP_NAME;
1336 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1337 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1339 warn_about_normalization (pfile, result, &nst);
1342 /* SDCC _asm specific */
1343 /* handle _asm ... _endasm ; */
1344 if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1346 comment_start = buffer->cur;
1347 result->type = CPP_ASM;
1348 skip_asm_block (pfile);
1349 /* Save the _asm block as a token in its own right. */
1350 save_asm (pfile, result, comment_start);
1352 /* Convert named operators to their proper types. */
1353 else if (result->val.node->flags & NODE_OPERATOR)
1355 result->flags |= NAMED_OP;
1356 result->type = (enum cpp_ttype) result->val.node->directive_index;
1362 lex_string (pfile, result, buffer->cur - 1);
1366 /* A potential block or line comment. */
1367 comment_start = buffer->cur;
1372 if (_cpp_skip_block_comment (pfile))
1373 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1375 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1376 || cpp_in_system_header (pfile)))
1378 /* Warn about comments only if pedantically GNUC89, and not
1379 in system headers. */
1380 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1381 && ! buffer->warned_cplusplus_comments)
1383 cpp_error (pfile, CPP_DL_PEDWARN,
1384 "C++ style comments are not allowed in ISO C90");
1385 cpp_error (pfile, CPP_DL_PEDWARN,
1386 "(this will be reported only once per input file)");
1387 buffer->warned_cplusplus_comments = 1;
1390 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1391 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1396 result->type = CPP_DIV_EQ;
1401 result->type = CPP_DIV;
1405 if (!pfile->state.save_comments)
1407 result->flags |= PREV_WHITE;
1408 goto update_tokens_line;
1411 /* Save the comment as a token in its own right. */
1412 save_comment (pfile, result, comment_start, c);
1416 if (pfile->state.angled_headers)
1418 lex_string (pfile, result, buffer->cur - 1);
1422 result->type = CPP_LESS;
1423 if (*buffer->cur == '=')
1424 buffer->cur++, result->type = CPP_LESS_EQ;
1425 else if (*buffer->cur == '<')
1428 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1430 else if (CPP_OPTION (pfile, digraphs))
1432 if (*buffer->cur == ':')
1435 result->flags |= DIGRAPH;
1436 result->type = CPP_OPEN_SQUARE;
1438 else if (*buffer->cur == '%')
1441 result->flags |= DIGRAPH;
1442 result->type = CPP_OPEN_BRACE;
1448 result->type = CPP_GREATER;
1449 if (*buffer->cur == '=')
1450 buffer->cur++, result->type = CPP_GREATER_EQ;
1451 else if (*buffer->cur == '>')
1454 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1459 result->type = CPP_MOD;
1460 if (*buffer->cur == '=')
1461 buffer->cur++, result->type = CPP_MOD_EQ;
1462 else if (CPP_OPTION (pfile, digraphs))
1464 if (*buffer->cur == ':')
1467 result->flags |= DIGRAPH;
1468 result->type = CPP_HASH;
1469 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1470 buffer->cur += 2, result->type = CPP_PASTE;
1472 else if (*buffer->cur == '>')
1475 result->flags |= DIGRAPH;
1476 result->type = CPP_CLOSE_BRACE;
1482 result->type = CPP_DOT;
1483 if (ISDIGIT (*buffer->cur))
1485 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1486 result->type = CPP_NUMBER;
1487 if (CPP_OPTION(pfile, pedantic_parse_number))
1488 pedantic_lex_number (pfile, &result->val.str);
1490 lex_number (pfile, &result->val.str, &nst);
1491 warn_about_normalization (pfile, result, &nst);
1493 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1494 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1495 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1496 buffer->cur++, result->type = CPP_DOT_STAR;
1500 result->type = CPP_PLUS;
1501 if (*buffer->cur == '+')
1502 buffer->cur++, result->type = CPP_PLUS_PLUS;
1503 else if (*buffer->cur == '=')
1504 buffer->cur++, result->type = CPP_PLUS_EQ;
1508 result->type = CPP_MINUS;
1509 if (*buffer->cur == '>')
1512 result->type = CPP_DEREF;
1513 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1514 buffer->cur++, result->type = CPP_DEREF_STAR;
1516 else if (*buffer->cur == '-')
1517 buffer->cur++, result->type = CPP_MINUS_MINUS;
1518 else if (*buffer->cur == '=')
1519 buffer->cur++, result->type = CPP_MINUS_EQ;
1523 result->type = CPP_AND;
1524 if (*buffer->cur == '&')
1525 buffer->cur++, result->type = CPP_AND_AND;
1526 else if (*buffer->cur == '=')
1527 buffer->cur++, result->type = CPP_AND_EQ;
1531 result->type = CPP_OR;
1532 if (*buffer->cur == '|')
1533 buffer->cur++, result->type = CPP_OR_OR;
1534 else if (*buffer->cur == '=')
1535 buffer->cur++, result->type = CPP_OR_EQ;
1539 result->type = CPP_COLON;
1540 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1541 buffer->cur++, result->type = CPP_SCOPE;
1542 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1545 result->flags |= DIGRAPH;
1546 result->type = CPP_CLOSE_SQUARE;
1550 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1551 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1552 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1553 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1554 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1556 case '?': result->type = CPP_QUERY; break;
1557 case '~': result->type = CPP_COMPL; break;
1558 case ',': result->type = CPP_COMMA; break;
1559 case '(': result->type = CPP_OPEN_PAREN; break;
1560 case ')': result->type = CPP_CLOSE_PAREN; break;
1561 case '[': result->type = CPP_OPEN_SQUARE; break;
1562 case ']': result->type = CPP_CLOSE_SQUARE; break;
1563 case '{': result->type = CPP_OPEN_BRACE; break;
1564 case '}': result->type = CPP_CLOSE_BRACE; break;
1565 case ';': result->type = CPP_SEMICOLON; break;
1567 /* @ is a punctuator in Objective-C. */
1568 case '@': result->type = CPP_ATSIGN; break;
1573 const uchar *base = --buffer->cur;
1574 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1576 if (forms_identifier_p (pfile, true, &nst))
1578 result->type = CPP_NAME;
1579 result->val.node = lex_identifier (pfile, base, true, &nst);
1580 warn_about_normalization (pfile, result, &nst);
1587 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1594 /* An upper bound on the number of bytes needed to spell TOKEN.
1595 Does not include preceding whitespace. */
1597 cpp_token_len (const cpp_token *token)
1601 switch (TOKEN_SPELL (token))
1603 default: len = 4; break;
1604 case SPELL_LITERAL: len = token->val.str.len; break;
1605 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1611 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1612 Return the number of bytes read out of NAME. (There are always
1613 10 bytes written to BUFFER.) */
1616 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1622 unsigned long utf32;
1624 /* Compute the length of the UTF-8 sequence. */
1625 for (t = *name; t & 0x80; t <<= 1)
1628 utf32 = *name & (0x7F >> ucn_len);
1629 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1631 utf32 = (utf32 << 6) | (*++name & 0x3F);
1633 /* Ill-formed UTF-8. */
1634 if ((*name & ~0x3F) != 0x80)
1640 for (j = 7; j >= 0; j--)
1641 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1646 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1647 already contain the enough space to hold the token's spelling.
1648 Returns a pointer to the character after the last character written.
1649 FORSTRING is true if this is to be the spelling after translation
1650 phase 1 (this is different for UCNs).
1651 FIXME: Would be nice if we didn't need the PFILE argument. */
1653 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1654 unsigned char *buffer, bool forstring)
1656 switch (TOKEN_SPELL (token))
1658 case SPELL_OPERATOR:
1660 const unsigned char *spelling;
1663 if (token->flags & DIGRAPH)
1665 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1666 else if (token->flags & NAMED_OP)
1669 spelling = TOKEN_NAME (token);
1671 while ((c = *spelling++) != '\0')
1680 memcpy (buffer, NODE_NAME (token->val.node),
1681 NODE_LEN (token->val.node));
1682 buffer += NODE_LEN (token->val.node);
1687 const unsigned char * name = NODE_NAME (token->val.node);
1689 for (i = 0; i < NODE_LEN (token->val.node); i++)
1690 if (name[i] & ~0x7F)
1692 i += utf8_to_ucn (buffer, name + i) - 1;
1696 *buffer++ = NODE_NAME (token->val.node)[i];
1701 memcpy (buffer, token->val.str.text, token->val.str.len);
1702 buffer += token->val.str.len;
1706 cpp_error (pfile, CPP_DL_ICE,
1707 "unspellable token %s", TOKEN_NAME (token));
1714 /* Returns TOKEN spelt as a null-terminated string. The string is
1715 freed when the reader is destroyed. Useful for diagnostics. */
1717 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1719 unsigned int len = cpp_token_len (token) + 1;
1720 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1722 end = cpp_spell_token (pfile, token, start, false);
1728 /* Used by C front ends, which really should move to using
1729 cpp_token_as_text. */
1731 cpp_type2name (enum cpp_ttype type)
1733 return (const char *) token_spellings[type].name;
1736 /* Writes the spelling of token to FP, without any preceding space.
1737 Separated from cpp_spell_token for efficiency - to avoid stdio
1738 double-buffering. */
1740 cpp_output_token (const cpp_token *token, FILE *fp)
1742 switch (TOKEN_SPELL (token))
1744 case SPELL_OPERATOR:
1746 const unsigned char *spelling;
1749 if (token->flags & DIGRAPH)
1751 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1752 else if (token->flags & NAMED_OP)
1755 spelling = TOKEN_NAME (token);
1760 while ((c = *++spelling) != '\0');
1768 const unsigned char * name = NODE_NAME (token->val.node);
1770 for (i = 0; i < NODE_LEN (token->val.node); i++)
1771 if (name[i] & ~0x7F)
1773 unsigned char buffer[10];
1774 i += utf8_to_ucn (buffer, name + i) - 1;
1775 fwrite (buffer, 1, 10, fp);
1778 fputc (NODE_NAME (token->val.node)[i], fp);
1783 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1787 /* An error, most probably. */
1792 /* Compare two tokens. */
1794 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1796 if (a->type == b->type && a->flags == b->flags)
1797 switch (TOKEN_SPELL (a))
1799 default: /* Keep compiler happy. */
1800 case SPELL_OPERATOR:
1803 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1805 return a->val.node == b->val.node;
1807 return (a->val.str.len == b->val.str.len
1808 && !memcmp (a->val.str.text, b->val.str.text,
1815 /* Returns nonzero if a space should be inserted to avoid an
1816 accidental token paste for output. For simplicity, it is
1817 conservative, and occasionally advises a space where one is not
1818 needed, e.g. "." and ".2". */
1820 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1821 const cpp_token *token2)
1823 enum cpp_ttype a = token1->type, b = token2->type;
1826 if (token1->flags & NAMED_OP)
1828 if (token2->flags & NAMED_OP)
1832 if (token2->flags & DIGRAPH)
1833 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1834 else if (token_spellings[b].category == SPELL_OPERATOR)
1835 c = token_spellings[b].name[0];
1837 /* Quickly get everything that can paste with an '='. */
1838 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1843 case CPP_GREATER: return c == '>';
1844 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1845 case CPP_PLUS: return c == '+';
1846 case CPP_MINUS: return c == '-' || c == '>';
1847 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1848 case CPP_MOD: return c == ':' || c == '>';
1849 case CPP_AND: return c == '&';
1850 case CPP_OR: return c == '|';
1851 case CPP_COLON: return c == ':' || c == '>';
1852 case CPP_DEREF: return c == '*';
1853 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1854 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1855 case CPP_NAME: return ((b == CPP_NUMBER
1856 && name_p (pfile, &token2->val.str))
1858 || b == CPP_CHAR || b == CPP_STRING); /* L */
1859 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1860 || c == '.' || c == '+' || c == '-');
1862 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1864 || (CPP_OPTION (pfile, objc)
1865 && token1->val.str.text[0] == '@'
1866 && (b == CPP_NAME || b == CPP_STRING)));
1873 /* Output all the remaining tokens on the current line, and a newline
1874 character, to FP. Leading whitespace is removed. If there are
1875 macros, special token padding is not performed. */
1877 cpp_output_line (cpp_reader *pfile, FILE *fp)
1879 const cpp_token *token;
1881 token = cpp_get_token (pfile);
1882 while (token->type != CPP_EOF)
1884 cpp_output_token (token, fp);
1885 token = cpp_get_token (pfile);
1886 if (token->flags & PREV_WHITE)
1893 /* Memory buffers. Changing these three constants can have a dramatic
1894 effect on performance. The values here are reasonable defaults,
1895 but might be tuned. If you adjust them, be sure to test across a
1896 range of uses of cpplib, including heavy nested function-like macro
1897 expansion. Also check the change in peak memory usage (NJAMD is a
1898 good tool for this). */
1899 #define MIN_BUFF_SIZE 8000
1900 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1901 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1902 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1904 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1905 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1908 /* Create a new allocation buffer. Place the control block at the end
1909 of the buffer, so that buffer overflows will cause immediate chaos. */
1911 new_buff (size_t len)
1914 unsigned char *base;
1916 if (len < MIN_BUFF_SIZE)
1917 len = MIN_BUFF_SIZE;
1918 len = CPP_ALIGN (len);
1920 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1921 result = (_cpp_buff *) (base + len);
1922 result->base = base;
1924 result->limit = base + len;
1925 result->next = NULL;
1929 /* Place a chain of unwanted allocation buffers on the free list. */
1931 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1933 _cpp_buff *end = buff;
1937 end->next = pfile->free_buffs;
1938 pfile->free_buffs = buff;
1941 /* Return a free buffer of size at least MIN_SIZE. */
1943 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1945 _cpp_buff *result, **p;
1947 for (p = &pfile->free_buffs;; p = &(*p)->next)
1952 return new_buff (min_size);
1954 size = result->limit - result->base;
1955 /* Return a buffer that's big enough, but don't waste one that's
1957 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1962 result->next = NULL;
1963 result->cur = result->base;
1967 /* Creates a new buffer with enough space to hold the uncommitted
1968 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1969 the excess bytes to the new buffer. Chains the new buffer after
1970 BUFF, and returns the new buffer. */
1972 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1974 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1975 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1977 buff->next = new_buff;
1978 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1982 /* Creates a new buffer with enough space to hold the uncommitted
1983 remaining bytes of the buffer pointed to by BUFF, and at least
1984 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1985 Chains the new buffer before the buffer pointed to by BUFF, and
1986 updates the pointer to point to the new buffer. */
1988 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1990 _cpp_buff *new_buff, *old_buff = *pbuff;
1991 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1993 new_buff = _cpp_get_buff (pfile, size);
1994 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1995 new_buff->next = old_buff;
1999 /* Free a chain of buffers starting at BUFF. */
2001 _cpp_free_buff (_cpp_buff *buff)
2005 for (; buff; buff = next)
2012 /* Allocate permanent, unaligned storage of length LEN. */
2014 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2016 _cpp_buff *buff = pfile->u_buff;
2017 unsigned char *result = buff->cur;
2019 if (len > (size_t) (buff->limit - result))
2021 buff = _cpp_get_buff (pfile, len);
2022 buff->next = pfile->u_buff;
2023 pfile->u_buff = buff;
2027 buff->cur = result + len;
2031 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2032 That buffer is used for growing allocations when saving macro
2033 replacement lists in a #define, and when parsing an answer to an
2034 assertion in #assert, #unassert or #if (and therefore possibly
2035 whilst expanding macros). It therefore must not be used by any
2036 code that they might call: specifically the lexer and the guts of
2039 All existing other uses clearly fit this restriction: storing
2040 registered pragmas during initialization. */
2042 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2044 _cpp_buff *buff = pfile->a_buff;
2045 unsigned char *result = buff->cur;
2047 if (len > (size_t) (buff->limit - result))
2049 buff = _cpp_get_buff (pfile, len);
2050 buff->next = pfile->a_buff;
2051 pfile->a_buff = buff;
2055 buff->cur = result + len;
2059 /* Say which field of TOK is in use. */
2061 enum cpp_token_fld_kind
2062 cpp_token_val_index (cpp_token *tok)
2064 switch (TOKEN_SPELL (tok))
2067 return CPP_TOKEN_FLD_NODE;
2069 return CPP_TOKEN_FLD_STR;
2071 if (tok->type == CPP_MACRO_ARG)
2072 return CPP_TOKEN_FLD_ARG_NO;
2073 else if (tok->type == CPP_PADDING)
2074 return CPP_TOKEN_FLD_SOURCE;
2075 else if (tok->type == CPP_PRAGMA)
2076 return CPP_TOKEN_FLD_PRAGMA;
2077 /* else fall through */
2079 return CPP_TOKEN_FLD_NONE;