1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
38 enum spell_type category;
39 const unsigned char *name;
42 static const unsigned char *const digraph_spellings[] =
43 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, U s },
46 #define TK(e, s) { SPELL_ ## s, U #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
65 static _cpp_buff *new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token *token, const char *string)
75 if (token->type != CPP_NAME)
78 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 if (buffer->notes_used == buffer->notes_cap)
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
98 /* Returns with a logical line that contains no escaped newlines or
99 trigraphs. This is a time-critical inner loop. */
101 _cpp_clean_line (cpp_reader *pfile)
107 buffer = pfile->buffer;
108 buffer->cur_note = buffer->notes_used = 0;
109 buffer->cur = buffer->line_base = buffer->next_line;
110 buffer->need_line = false;
111 s = buffer->next_line - 1;
113 if (!buffer->from_stage3)
115 /* Short circuit for the common case of an un-escaped line with
116 no trigraphs. The primary win here is by not writing any
117 data back to memory until we have to. */
121 if (c == '\n' || c == '\r')
125 if (s == buffer->rlimit)
128 /* DOS line ending? */
129 if (c == '\r' && s[1] == '\n')
132 if (s == buffer->rlimit)
135 /* check for escaped newline */
137 while (p != buffer->next_line && is_nvspace (p[-1]))
139 if (p == buffer->next_line || p[-1] != '\\')
142 /* Have an escaped newline; process it and proceed to
144 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
146 buffer->next_line = p - 1;
149 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
151 /* Have a trigraph. We may or may not have to convert
152 it. Add a line note regardless, for -Wtrigraphs. */
153 add_line_note (buffer, s, s[2]);
154 if (CPP_OPTION (pfile, trigraphs))
156 /* We do, and that means we have to switch to the
159 *d = _cpp_trigraph_map[s[2]];
172 if (c == '\n' || c == '\r')
174 /* Handle DOS line endings. */
175 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
177 if (s == buffer->rlimit)
182 while (p != buffer->next_line && is_nvspace (p[-1]))
184 if (p == buffer->next_line || p[-1] != '\\')
187 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
189 buffer->next_line = p - 1;
191 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
193 /* Add a note regardless, for the benefit of -Wtrigraphs. */
194 add_line_note (buffer, d, s[2]);
195 if (CPP_OPTION (pfile, trigraphs))
197 *d = _cpp_trigraph_map[s[2]];
207 while (*s != '\n' && *s != '\r');
210 /* Handle DOS line endings. */
211 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
217 /* A sentinel note that should never be processed. */
218 add_line_note (buffer, d + 1, '\n');
219 buffer->next_line = s + 1;
222 /* Return true if the trigraph indicated by NOTE should be warned
223 about in a comment. */
225 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
229 /* Within comments we don't warn about trigraphs, unless the
230 trigraph forms an escaped newline, as that may change
232 if (note->type != '/')
235 /* If -trigraphs, then this was an escaped newline iff the next note
237 if (CPP_OPTION (pfile, trigraphs))
238 return note[1].pos == note->pos;
240 /* Otherwise, see if this forms an escaped newline. */
242 while (is_nvspace (*p))
245 /* There might have been escaped newlines between the trigraph and the
246 newline we found. Hence the position test. */
247 return (*p == '\n' && p < note[1].pos);
250 /* Process the notes created by add_line_note as far as the current
253 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
255 cpp_buffer *buffer = pfile->buffer;
259 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
262 if (note->pos > buffer->cur)
266 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
268 if (note->type == '\\' || note->type == ' ')
270 if (note->type == ' ' && !in_comment)
271 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
272 "backslash and newline separated by space");
274 if (buffer->next_line > buffer->rlimit)
276 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
277 "backslash-newline at end of file");
278 /* Prevent "no newline at end of file" warning. */
279 buffer->next_line = buffer->rlimit;
282 buffer->line_base = note->pos;
283 CPP_INCREMENT_LINE (pfile, 0);
285 else if (_cpp_trigraph_map[note->type])
287 if (CPP_OPTION (pfile, warn_trigraphs)
288 && (!in_comment || warn_in_comment (pfile, note)))
290 if (CPP_OPTION (pfile, trigraphs))
291 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
292 "trigraph ??%c converted to %c",
294 (int) _cpp_trigraph_map[note->type]);
298 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
299 "trigraph ??%c ignored, use -trigraphs to enable",
309 /* SDCC _asm specific */
310 /* Skip an _asm ... _endasm block. We find the end of the comment by
311 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
314 skip_asm_block (cpp_reader *pfile)
316 #define _ENDASM_STR "endasm"
317 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
319 cpp_buffer *buffer = pfile->buffer;
324 while (buffer->cur != buffer->rlimit)
326 prev_space = is_space(c);
329 if (prev_space && c == '_')
331 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
332 strncmp((char *)buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
334 buffer->cur += _ENDASM_LEN;
343 _cpp_process_line_notes (pfile, true);
344 if (buffer->next_line >= buffer->rlimit)
346 _cpp_clean_line (pfile);
348 cols = buffer->next_line - buffer->line_base;
349 CPP_INCREMENT_LINE (pfile, cols);
353 _cpp_process_line_notes (pfile, true);
357 /* Skip a C-style block comment. We find the end of the comment by
358 seeing if an asterisk is before every '/' we encounter. Returns
359 nonzero if comment terminated by EOF, zero otherwise.
361 Buffer->cur points to the initial asterisk of the comment. */
363 _cpp_skip_block_comment (cpp_reader *pfile)
365 cpp_buffer *buffer = pfile->buffer;
366 const uchar *cur = buffer->cur;
375 /* People like decorating comments with '*', so check for '/'
376 instead for efficiency. */
384 /* Warn about potential nested comments, but not if the '/'
385 comes immediately before the true comment delimiter.
386 Don't bother to get it right across escaped newlines. */
387 if (CPP_OPTION (pfile, warn_comments)
388 && cur[0] == '*' && cur[1] != '/')
391 cpp_error_with_line (pfile, CPP_DL_WARNING,
392 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
393 "\"/*\" within comment");
399 buffer->cur = cur - 1;
400 _cpp_process_line_notes (pfile, true);
401 if (buffer->next_line >= buffer->rlimit)
403 _cpp_clean_line (pfile);
405 cols = buffer->next_line - buffer->line_base;
406 CPP_INCREMENT_LINE (pfile, cols);
413 _cpp_process_line_notes (pfile, true);
417 /* Skip a C++ line comment, leaving buffer->cur pointing to the
418 terminating newline. Handles escaped newlines. Returns nonzero
419 if a multiline comment. */
421 skip_line_comment (cpp_reader *pfile)
423 cpp_buffer *buffer = pfile->buffer;
424 unsigned int orig_line = pfile->line_table->highest_line;
426 while (*buffer->cur != '\n')
429 _cpp_process_line_notes (pfile, true);
430 return orig_line != pfile->line_table->highest_line;
433 /* Skips whitespace, saving the next non-whitespace character. */
435 skip_whitespace (cpp_reader *pfile, cppchar_t c)
437 cpp_buffer *buffer = pfile->buffer;
438 bool saw_NUL = false;
442 /* Horizontal space always OK. */
443 if (c == ' ' || c == '\t')
445 /* Just \f \v or \0 left. */
448 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
449 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
450 CPP_BUF_COL (buffer),
451 "%s in preprocessing directive",
452 c == '\f' ? "form feed" : "vertical tab");
456 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
457 while (is_nvspace (c));
460 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
465 /* See if the characters of a number token are valid in a name (no
468 name_p (cpp_reader *pfile, const cpp_string *string)
472 for (i = 0; i < string->len; i++)
473 if (!is_idchar (string->text[i]))
479 /* After parsing an identifier or other sequence, produce a warning about
480 sequences not in NFC/NFKC. */
482 warn_about_normalization (cpp_reader *pfile,
483 const cpp_token *token,
484 const struct normalize_state *s)
486 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
487 && !pfile->state.skipping)
489 /* Make sure that the token is printed using UCNs, even
490 if we'd otherwise happily print UTF-8. */
491 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
494 sz = cpp_spell_token (pfile, token, buf, false) - buf;
495 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
496 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
497 "`%.*s' is not in NFKC", (int) sz, buf);
499 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
500 "`%.*s' is not in NFC", (int) sz, buf);
504 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
505 an identifier. FIRST is TRUE if this starts an identifier. */
507 forms_identifier_p (cpp_reader *pfile, int first,
508 struct normalize_state *state)
510 cpp_buffer *buffer = pfile->buffer;
512 if (*buffer->cur == '$')
514 if (!CPP_OPTION (pfile, dollars_in_ident))
518 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
520 CPP_OPTION (pfile, warn_dollars) = 0;
521 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
527 /* Is this a syntactically valid UCN? */
528 if (CPP_OPTION (pfile, extended_identifiers)
529 && *buffer->cur == '\\'
530 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
533 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
542 /* Lex an identifier starting at BUFFER->CUR - 1. */
543 static cpp_hashnode *
544 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
545 struct normalize_state *nst)
547 cpp_hashnode *result;
550 unsigned int hash = HT_HASHSTEP (0, *base);
552 cur = pfile->buffer->cur;
554 while (ISIDNUM (*cur))
556 hash = HT_HASHSTEP (hash, *cur);
559 pfile->buffer->cur = cur;
560 if (starts_ucn || forms_identifier_p (pfile, false, nst))
562 /* Slower version for identifiers containing UCNs (or $). */
564 while (ISIDNUM (*pfile->buffer->cur))
566 pfile->buffer->cur++;
567 NORMALIZE_STATE_UPDATE_IDNUM (nst);
569 } while (forms_identifier_p (pfile, false, nst));
570 result = _cpp_interpret_identifier (pfile, base,
571 pfile->buffer->cur - base);
576 hash = HT_HASHFINISH (hash, len);
578 result = (cpp_hashnode *)
579 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
582 /* Rarely, identifiers require diagnostics when lexed. */
583 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
584 && !pfile->state.skipping, 0))
586 /* It is allowed to poison the same identifier twice. */
587 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
588 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
591 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
592 replacement list of a variadic macro. */
593 if (result == pfile->spec_nodes.n__VA_ARGS__
594 && !pfile->state.va_args_ok)
595 cpp_error (pfile, CPP_DL_PEDWARN,
596 "__VA_ARGS__ can only appear in the expansion"
597 " of a C99 variadic macro");
604 /* Pedantic parse a number, beginning with character C, skipping embedded
605 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
606 before C. Place the result in NUMBER. */
608 pedantic_lex_number (cpp_reader *pfile, cpp_string *number)
610 #define get_effective_char(pfile) (*pfile->buffer->cur++)
611 #define BACKUP() (--pfile->buffer->cur)
613 enum num_type_e { NT_DEC, NT_HEX } num_type = NT_DEC;
614 enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
616 uchar c = *(pfile->buffer->cur - 1);
617 struct obstack *stack = &pfile->hash_table->stack;
626 obstack_1grow (stack, '.');
627 c = get_effective_char(pfile);
635 obstack_1grow (stack, c);
636 c = get_effective_char(pfile);
644 obstack_1grow (stack, c);
645 c = get_effective_char(pfile);
651 obstack_1grow (stack, c);
652 c = get_effective_char(pfile);
663 if (NT_DEC == num_type)
669 obstack_1grow (stack, c);
670 c = get_effective_char(pfile);
677 obstack_1grow (stack, c);
678 c = get_effective_char(pfile);
681 else if ('E' == c || 'e' == c)
683 if (has_whole || has_fract)
687 obstack_1grow (stack, c);
688 c = get_effective_char(pfile);
701 obstack_1grow (stack, c);
702 c = get_effective_char(pfile);
709 obstack_1grow (stack, c);
710 c = get_effective_char(pfile);
713 else if ('P' == c || 'p' == c)
715 if (has_whole || has_fract)
719 obstack_1grow (stack, c);
720 c = get_effective_char(pfile);
727 num_part = NP_INT_SUFFIX;
731 if (NT_DEC == num_type)
737 obstack_1grow (stack, c);
738 c = get_effective_char(pfile);
741 if ('E' == c || 'e' == c)
743 if (has_whole || has_fract)
747 obstack_1grow (stack, c);
748 c = get_effective_char(pfile);
759 obstack_1grow (stack, c);
760 c = get_effective_char(pfile);
763 if ('P' == c || 'p' == c)
765 if (has_whole || has_fract)
769 obstack_1grow (stack, c);
770 c = get_effective_char(pfile);
775 num_part = NP_FLOAT_SUFFIX;
779 if ('+' == c || '-' == c)
782 obstack_1grow (stack, c);
783 c = get_effective_char(pfile);
789 obstack_1grow (stack, c);
790 c = get_effective_char(pfile);
793 num_part = NP_FLOAT_SUFFIX;
797 if ('L' == c || 'l' == c)
802 obstack_1grow (stack, c);
803 c = get_effective_char(pfile);
808 obstack_1grow (stack, c);
809 c = get_effective_char(pfile);
812 else if ('U' == c || 'u' == c)
815 obstack_1grow (stack, c);
816 c = get_effective_char(pfile);
820 case NP_FLOAT_SUFFIX:
821 if ('F' == c || 'f' == c)
824 obstack_1grow (stack, c);
825 c = get_effective_char(pfile);
827 else if ('L' == c || 'l' == c)
830 obstack_1grow (stack, c);
831 c = get_effective_char(pfile);
838 /* Step back over the unwanted char. */
841 number->text = obstack_finish (stack);
845 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
847 lex_number (cpp_reader *pfile, cpp_string *number,
848 struct normalize_state *nst)
854 base = pfile->buffer->cur - 1;
857 cur = pfile->buffer->cur;
859 /* N.B. ISIDNUM does not include $. */
860 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
863 NORMALIZE_STATE_UPDATE_IDNUM (nst);
866 pfile->buffer->cur = cur;
868 while (forms_identifier_p (pfile, false, nst));
870 number->len = cur - base;
871 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
872 memcpy (dest, base, number->len);
873 dest[number->len] = '\0';
877 /* Create a token of type TYPE with a literal spelling. */
879 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
880 unsigned int len, enum cpp_ttype type)
882 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
884 memcpy (dest, base, len);
887 token->val.str.len = len;
888 token->val.str.text = dest;
891 /* Lexes a string, character constant, or angle-bracketed header file
892 name. The stored string contains the spelling, including opening
893 quote and leading any leading 'L'. It returns the type of the
894 literal, or CPP_OTHER if it was not properly terminated.
896 The spelling is NUL-terminated, but it is not guaranteed that this
897 is the first NUL since embedded NULs are preserved. */
899 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
901 bool saw_NUL = false;
903 cppchar_t terminator;
908 if (terminator == 'L')
910 if (terminator == '\"')
911 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
912 else if (terminator == '\'')
913 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
915 terminator = '>', type = CPP_HEADER_NAME;
919 cppchar_t c = *cur++;
921 /* In #include-style directives, terminators are not escapable. */
922 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
924 else if (c == terminator)
936 if (saw_NUL && !pfile->state.skipping)
937 cpp_error (pfile, CPP_DL_WARNING,
938 "null character(s) preserved in literal");
940 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
941 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
944 pfile->buffer->cur = cur;
945 create_literal (pfile, token, base, cur - base, type);
948 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
949 comment blocks (when executed with -C option) and
950 _asm (SDCPP specific) blocks */
952 /* Count and copy characters from src to dest, excluding CRs:
953 CRs are automatically generated, because the output is
954 opened in TEXT mode. If dest == NULL, only count chars */
956 copy_text_chars (unsigned char *dest, const unsigned char *src, unsigned int len)
959 const unsigned char *p;
961 for (p = src; p != src + len; ++p)
976 /* SDCC _asm specific */
977 /* The stored comment includes the comment start and any terminator. */
979 save_asm (cpp_reader *pfile, cpp_token *token, const unsigned char *from)
981 #define _ASM_STR "_asm"
982 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
984 unsigned char *buffer;
985 unsigned int text_len, len;
987 len = pfile->buffer->cur - from;
988 /* + _ASM_LEN for the initial '_asm'. */
989 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
990 buffer = _cpp_unaligned_alloc (pfile, text_len);
993 token->type = CPP_ASM;
994 token->val.str.len = text_len;
995 token->val.str.text = buffer;
997 memcpy (buffer, _ASM_STR, _ASM_LEN);
998 copy_text_chars (buffer + _ASM_LEN, from, len);
1001 /* The stored comment includes the comment start and any terminator. */
1003 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1006 unsigned char *buffer;
1007 unsigned int len, clen;
1009 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1011 /* C++ comments probably (not definitely) have moved past a new
1012 line, which we don't want to save in the comment. */
1013 if (is_vspace (pfile->buffer->cur[-1]))
1016 /* If we are currently in a directive, then we need to store all
1017 C++ comments as C comments internally, and so we need to
1018 allocate a little extra space in that case.
1020 Note that the only time we encounter a directive here is
1021 when we are saving comments in a "#define". */
1022 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1024 buffer = _cpp_unaligned_alloc (pfile, clen);
1026 token->type = CPP_COMMENT;
1027 token->val.str.len = clen;
1028 token->val.str.text = buffer;
1031 copy_text_chars (buffer + 1, from, len);
1033 /* Finish conversion to a C comment, if necessary. */
1034 if (pfile->state.in_directive && type == '/')
1037 buffer[clen - 2] = '*';
1038 buffer[clen - 1] = '/';
1042 /* Allocate COUNT tokens for RUN. */
1044 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1046 run->base = XNEWVEC (cpp_token, count);
1047 run->limit = run->base + count;
1051 /* Returns the next tokenrun, or creates one if there is none. */
1053 next_tokenrun (tokenrun *run)
1055 if (run->next == NULL)
1057 run->next = XNEW (tokenrun);
1058 run->next->prev = run;
1059 _cpp_init_tokenrun (run->next, 250);
1065 /* Allocate a single token that is invalidated at the same time as the
1066 rest of the tokens on the line. Has its line and col set to the
1067 same as the last lexed token, so that diagnostics appear in the
1070 _cpp_temp_token (cpp_reader *pfile)
1072 cpp_token *old, *result;
1074 old = pfile->cur_token - 1;
1075 if (pfile->cur_token == pfile->cur_run->limit)
1077 pfile->cur_run = next_tokenrun (pfile->cur_run);
1078 pfile->cur_token = pfile->cur_run->base;
1081 result = pfile->cur_token++;
1082 result->src_loc = old->src_loc;
1086 /* Lex a token into RESULT (external interface). Takes care of issues
1087 like directive handling, token lookahead, multiple include
1088 optimization and skipping. */
1090 _cpp_lex_token (cpp_reader *pfile)
1096 if (pfile->cur_token == pfile->cur_run->limit)
1098 pfile->cur_run = next_tokenrun (pfile->cur_run);
1099 pfile->cur_token = pfile->cur_run->base;
1102 if (pfile->lookaheads)
1104 pfile->lookaheads--;
1105 result = pfile->cur_token++;
1108 result = _cpp_lex_direct (pfile);
1110 if (result->flags & BOL)
1112 /* Is this a directive. If _cpp_handle_directive returns
1113 false, it is an assembler #. */
1114 if (result->type == CPP_HASH
1115 /* 6.10.3 p 11: Directives in a list of macro arguments
1116 gives undefined behavior. This implementation
1117 handles the directive as normal. */
1118 && pfile->state.parsing_args != 1)
1120 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1122 if (pfile->directive_result.type == CPP_PADDING)
1124 result = &pfile->directive_result;
1127 else if (pfile->state.in_deferred_pragma)
1128 result = &pfile->directive_result;
1130 if (pfile->cb.line_change && !pfile->state.skipping)
1131 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1134 /* We don't skip tokens in directives. */
1135 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1138 /* Outside a directive, invalidate controlling macros. At file
1139 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1140 get here and MI optimization works. */
1141 pfile->mi_valid = false;
1143 if (!pfile->state.skipping || result->type == CPP_EOF)
1150 /* Returns true if a fresh line has been loaded. */
1152 _cpp_get_fresh_line (cpp_reader *pfile)
1156 /* We can't get a new line until we leave the current directive. */
1157 if (pfile->state.in_directive)
1162 cpp_buffer *buffer = pfile->buffer;
1164 if (!buffer->need_line)
1167 if (buffer->next_line < buffer->rlimit)
1169 _cpp_clean_line (pfile);
1173 /* First, get out of parsing arguments state. */
1174 if (pfile->state.parsing_args)
1177 /* End of buffer. Non-empty files should end in a newline. */
1178 if (buffer->buf != buffer->rlimit
1179 && buffer->next_line > buffer->rlimit
1180 && !buffer->from_stage3)
1182 /* Only warn once. */
1183 buffer->next_line = buffer->rlimit;
1184 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1185 CPP_BUF_COLUMN (buffer, buffer->cur),
1186 "no newline at end of file");
1189 return_at_eof = buffer->return_at_eof;
1190 _cpp_pop_buffer (pfile);
1191 if (pfile->buffer == NULL || return_at_eof)
1196 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1199 result->type = ELSE_TYPE; \
1200 if (*buffer->cur == CHAR) \
1201 buffer->cur++, result->type = THEN_TYPE; \
1205 /* Lex a token into pfile->cur_token, which is also incremented, to
1206 get diagnostics pointing to the correct location.
1208 Does not handle issues such as token lookahead, multiple-include
1209 optimization, directives, skipping etc. This function is only
1210 suitable for use by _cpp_lex_token, and in special cases like
1211 lex_expansion_token which doesn't care for any of these issues.
1213 When meeting a newline, returns CPP_EOF if parsing a directive,
1214 otherwise returns to the start of the token buffer if permissible.
1215 Returns the location of the lexed token. */
1217 _cpp_lex_direct (cpp_reader *pfile)
1221 const unsigned char *comment_start;
1222 cpp_token *result = pfile->cur_token++;
1226 buffer = pfile->buffer;
1227 if (buffer->need_line)
1229 if (pfile->state.in_deferred_pragma)
1231 result->type = CPP_PRAGMA_EOL;
1232 pfile->state.in_deferred_pragma = false;
1233 if (!pfile->state.pragma_allow_expansion)
1234 pfile->state.prevent_expansion--;
1237 if (!_cpp_get_fresh_line (pfile))
1239 result->type = CPP_EOF;
1240 if (!pfile->state.in_directive)
1242 /* Tell the compiler the line number of the EOF token. */
1243 result->src_loc = pfile->line_table->highest_line;
1244 result->flags = BOL;
1248 if (!pfile->keep_tokens)
1250 pfile->cur_run = &pfile->base_run;
1251 result = pfile->base_run.base;
1252 pfile->cur_token = result + 1;
1254 result->flags = BOL;
1255 if (pfile->state.parsing_args == 2)
1256 result->flags |= PREV_WHITE;
1258 buffer = pfile->buffer;
1260 result->src_loc = pfile->line_table->highest_line;
1263 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1264 && !pfile->overlaid_buffer)
1266 _cpp_process_line_notes (pfile, false);
1267 result->src_loc = pfile->line_table->highest_line;
1271 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1272 CPP_BUF_COLUMN (buffer, buffer->cur));
1276 case ' ': case '\t': case '\f': case '\v': case '\0':
1277 result->flags |= PREV_WHITE;
1278 skip_whitespace (pfile, c);
1282 if (buffer->cur < buffer->rlimit)
1283 CPP_INCREMENT_LINE (pfile, 0);
1284 buffer->need_line = true;
1287 case '0': case '1': case '2': case '3': case '4':
1288 case '5': case '6': case '7': case '8': case '9':
1290 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1291 result->type = CPP_NUMBER;
1292 if (CPP_OPTION(pfile, pedantic_parse_number))
1293 pedantic_lex_number (pfile, &result->val.str);
1295 lex_number (pfile, &result->val.str, &nst);
1296 warn_about_normalization (pfile, result, &nst);
1301 /* 'L' may introduce wide characters or strings. */
1302 if (*buffer->cur == '\'' || *buffer->cur == '"')
1304 lex_string (pfile, result, buffer->cur - 1);
1310 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1311 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1312 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1313 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1315 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1316 case 'G': case 'H': case 'I': case 'J': case 'K':
1317 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1318 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1320 result->type = CPP_NAME;
1322 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1323 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1325 warn_about_normalization (pfile, result, &nst);
1328 /* SDCC _asm specific */
1329 /* handle _asm ... _endasm ; */
1330 if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1332 comment_start = buffer->cur;
1333 result->type = CPP_ASM;
1334 skip_asm_block (pfile);
1335 /* Save the _asm block as a token in its own right. */
1336 save_asm (pfile, result, comment_start);
1338 /* Convert named operators to their proper types. */
1339 else if (result->val.node->flags & NODE_OPERATOR)
1341 result->flags |= NAMED_OP;
1342 result->type = (enum cpp_ttype) result->val.node->directive_index;
1348 lex_string (pfile, result, buffer->cur - 1);
1352 /* A potential block or line comment. */
1353 comment_start = buffer->cur;
1358 if (_cpp_skip_block_comment (pfile))
1359 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1361 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1362 || cpp_in_system_header (pfile)))
1364 /* Warn about comments only if pedantically GNUC89, and not
1365 in system headers. */
1366 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1367 && ! buffer->warned_cplusplus_comments)
1369 cpp_error (pfile, CPP_DL_PEDWARN,
1370 "C++ style comments are not allowed in ISO C90");
1371 cpp_error (pfile, CPP_DL_PEDWARN,
1372 "(this will be reported only once per input file)");
1373 buffer->warned_cplusplus_comments = 1;
1376 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1377 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1382 result->type = CPP_DIV_EQ;
1387 result->type = CPP_DIV;
1391 if (!pfile->state.save_comments)
1393 result->flags |= PREV_WHITE;
1394 goto update_tokens_line;
1397 /* Save the comment as a token in its own right. */
1398 save_comment (pfile, result, comment_start, c);
1402 if (pfile->state.angled_headers)
1404 lex_string (pfile, result, buffer->cur - 1);
1408 result->type = CPP_LESS;
1409 if (*buffer->cur == '=')
1410 buffer->cur++, result->type = CPP_LESS_EQ;
1411 else if (*buffer->cur == '<')
1414 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1416 else if (CPP_OPTION (pfile, digraphs))
1418 if (*buffer->cur == ':')
1421 result->flags |= DIGRAPH;
1422 result->type = CPP_OPEN_SQUARE;
1424 else if (*buffer->cur == '%')
1427 result->flags |= DIGRAPH;
1428 result->type = CPP_OPEN_BRACE;
1434 result->type = CPP_GREATER;
1435 if (*buffer->cur == '=')
1436 buffer->cur++, result->type = CPP_GREATER_EQ;
1437 else if (*buffer->cur == '>')
1440 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1445 result->type = CPP_MOD;
1446 if (*buffer->cur == '=')
1447 buffer->cur++, result->type = CPP_MOD_EQ;
1448 else if (CPP_OPTION (pfile, digraphs))
1450 if (*buffer->cur == ':')
1453 result->flags |= DIGRAPH;
1454 result->type = CPP_HASH;
1455 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1456 buffer->cur += 2, result->type = CPP_PASTE;
1458 else if (*buffer->cur == '>')
1461 result->flags |= DIGRAPH;
1462 result->type = CPP_CLOSE_BRACE;
1468 result->type = CPP_DOT;
1469 if (ISDIGIT (*buffer->cur))
1471 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1472 result->type = CPP_NUMBER;
1473 if (CPP_OPTION(pfile, pedantic_parse_number))
1474 pedantic_lex_number (pfile, &result->val.str);
1476 lex_number (pfile, &result->val.str, &nst);
1477 warn_about_normalization (pfile, result, &nst);
1479 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1480 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1481 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1482 buffer->cur++, result->type = CPP_DOT_STAR;
1486 result->type = CPP_PLUS;
1487 if (*buffer->cur == '+')
1488 buffer->cur++, result->type = CPP_PLUS_PLUS;
1489 else if (*buffer->cur == '=')
1490 buffer->cur++, result->type = CPP_PLUS_EQ;
1494 result->type = CPP_MINUS;
1495 if (*buffer->cur == '>')
1498 result->type = CPP_DEREF;
1499 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1500 buffer->cur++, result->type = CPP_DEREF_STAR;
1502 else if (*buffer->cur == '-')
1503 buffer->cur++, result->type = CPP_MINUS_MINUS;
1504 else if (*buffer->cur == '=')
1505 buffer->cur++, result->type = CPP_MINUS_EQ;
1509 result->type = CPP_AND;
1510 if (*buffer->cur == '&')
1511 buffer->cur++, result->type = CPP_AND_AND;
1512 else if (*buffer->cur == '=')
1513 buffer->cur++, result->type = CPP_AND_EQ;
1517 result->type = CPP_OR;
1518 if (*buffer->cur == '|')
1519 buffer->cur++, result->type = CPP_OR_OR;
1520 else if (*buffer->cur == '=')
1521 buffer->cur++, result->type = CPP_OR_EQ;
1525 result->type = CPP_COLON;
1526 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1527 buffer->cur++, result->type = CPP_SCOPE;
1528 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1531 result->flags |= DIGRAPH;
1532 result->type = CPP_CLOSE_SQUARE;
1536 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1537 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1538 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1539 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1540 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1542 case '?': result->type = CPP_QUERY; break;
1543 case '~': result->type = CPP_COMPL; break;
1544 case ',': result->type = CPP_COMMA; break;
1545 case '(': result->type = CPP_OPEN_PAREN; break;
1546 case ')': result->type = CPP_CLOSE_PAREN; break;
1547 case '[': result->type = CPP_OPEN_SQUARE; break;
1548 case ']': result->type = CPP_CLOSE_SQUARE; break;
1549 case '{': result->type = CPP_OPEN_BRACE; break;
1550 case '}': result->type = CPP_CLOSE_BRACE; break;
1551 case ';': result->type = CPP_SEMICOLON; break;
1553 /* @ is a punctuator in Objective-C. */
1554 case '@': result->type = CPP_ATSIGN; break;
1559 const uchar *base = --buffer->cur;
1560 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1562 if (forms_identifier_p (pfile, true, &nst))
1564 result->type = CPP_NAME;
1565 result->val.node = lex_identifier (pfile, base, true, &nst);
1566 warn_about_normalization (pfile, result, &nst);
1573 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1580 /* An upper bound on the number of bytes needed to spell TOKEN.
1581 Does not include preceding whitespace. */
1583 cpp_token_len (const cpp_token *token)
1587 switch (TOKEN_SPELL (token))
1589 default: len = 4; break;
1590 case SPELL_LITERAL: len = token->val.str.len; break;
1591 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1597 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1598 Return the number of bytes read out of NAME. (There are always
1599 10 bytes written to BUFFER.) */
1602 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1608 unsigned long utf32;
1610 /* Compute the length of the UTF-8 sequence. */
1611 for (t = *name; t & 0x80; t <<= 1)
1614 utf32 = *name & (0x7F >> ucn_len);
1615 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1617 utf32 = (utf32 << 6) | (*++name & 0x3F);
1619 /* Ill-formed UTF-8. */
1620 if ((*name & ~0x3F) != 0x80)
1626 for (j = 7; j >= 0; j--)
1627 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1632 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1633 already contain the enough space to hold the token's spelling.
1634 Returns a pointer to the character after the last character written.
1635 FORSTRING is true if this is to be the spelling after translation
1636 phase 1 (this is different for UCNs).
1637 FIXME: Would be nice if we didn't need the PFILE argument. */
1639 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1640 unsigned char *buffer, bool forstring)
1642 switch (TOKEN_SPELL (token))
1644 case SPELL_OPERATOR:
1646 const unsigned char *spelling;
1649 if (token->flags & DIGRAPH)
1651 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1652 else if (token->flags & NAMED_OP)
1655 spelling = TOKEN_NAME (token);
1657 while ((c = *spelling++) != '\0')
1666 memcpy (buffer, NODE_NAME (token->val.node),
1667 NODE_LEN (token->val.node));
1668 buffer += NODE_LEN (token->val.node);
1673 const unsigned char * name = NODE_NAME (token->val.node);
1675 for (i = 0; i < NODE_LEN (token->val.node); i++)
1676 if (name[i] & ~0x7F)
1678 i += utf8_to_ucn (buffer, name + i) - 1;
1682 *buffer++ = NODE_NAME (token->val.node)[i];
1687 memcpy (buffer, token->val.str.text, token->val.str.len);
1688 buffer += token->val.str.len;
1692 cpp_error (pfile, CPP_DL_ICE,
1693 "unspellable token %s", TOKEN_NAME (token));
1700 /* Returns TOKEN spelt as a null-terminated string. The string is
1701 freed when the reader is destroyed. Useful for diagnostics. */
1703 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1705 unsigned int len = cpp_token_len (token) + 1;
1706 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1708 end = cpp_spell_token (pfile, token, start, false);
1714 /* Used by C front ends, which really should move to using
1715 cpp_token_as_text. */
1717 cpp_type2name (enum cpp_ttype type)
1719 return (const char *) token_spellings[type].name;
1722 /* Writes the spelling of token to FP, without any preceding space.
1723 Separated from cpp_spell_token for efficiency - to avoid stdio
1724 double-buffering. */
1726 cpp_output_token (const cpp_token *token, FILE *fp)
1728 switch (TOKEN_SPELL (token))
1730 case SPELL_OPERATOR:
1732 const unsigned char *spelling;
1735 if (token->flags & DIGRAPH)
1737 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1738 else if (token->flags & NAMED_OP)
1741 spelling = TOKEN_NAME (token);
1746 while ((c = *++spelling) != '\0');
1754 const unsigned char * name = NODE_NAME (token->val.node);
1756 for (i = 0; i < NODE_LEN (token->val.node); i++)
1757 if (name[i] & ~0x7F)
1759 unsigned char buffer[10];
1760 i += utf8_to_ucn (buffer, name + i) - 1;
1761 fwrite (buffer, 1, 10, fp);
1764 fputc (NODE_NAME (token->val.node)[i], fp);
1769 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1773 /* An error, most probably. */
1778 /* Compare two tokens. */
1780 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1782 if (a->type == b->type && a->flags == b->flags)
1783 switch (TOKEN_SPELL (a))
1785 default: /* Keep compiler happy. */
1786 case SPELL_OPERATOR:
1789 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1791 return a->val.node == b->val.node;
1793 return (a->val.str.len == b->val.str.len
1794 && !memcmp (a->val.str.text, b->val.str.text,
1801 /* Returns nonzero if a space should be inserted to avoid an
1802 accidental token paste for output. For simplicity, it is
1803 conservative, and occasionally advises a space where one is not
1804 needed, e.g. "." and ".2". */
1806 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1807 const cpp_token *token2)
1809 enum cpp_ttype a = token1->type, b = token2->type;
1812 if (token1->flags & NAMED_OP)
1814 if (token2->flags & NAMED_OP)
1818 if (token2->flags & DIGRAPH)
1819 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1820 else if (token_spellings[b].category == SPELL_OPERATOR)
1821 c = token_spellings[b].name[0];
1823 /* Quickly get everything that can paste with an '='. */
1824 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1829 case CPP_GREATER: return c == '>';
1830 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1831 case CPP_PLUS: return c == '+';
1832 case CPP_MINUS: return c == '-' || c == '>';
1833 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1834 case CPP_MOD: return c == ':' || c == '>';
1835 case CPP_AND: return c == '&';
1836 case CPP_OR: return c == '|';
1837 case CPP_COLON: return c == ':' || c == '>';
1838 case CPP_DEREF: return c == '*';
1839 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1840 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1841 case CPP_NAME: return ((b == CPP_NUMBER
1842 && name_p (pfile, &token2->val.str))
1844 || b == CPP_CHAR || b == CPP_STRING); /* L */
1845 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1846 || c == '.' || c == '+' || c == '-');
1848 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1850 || (CPP_OPTION (pfile, objc)
1851 && token1->val.str.text[0] == '@'
1852 && (b == CPP_NAME || b == CPP_STRING)));
1859 /* Output all the remaining tokens on the current line, and a newline
1860 character, to FP. Leading whitespace is removed. If there are
1861 macros, special token padding is not performed. */
1863 cpp_output_line (cpp_reader *pfile, FILE *fp)
1865 const cpp_token *token;
1867 token = cpp_get_token (pfile);
1868 while (token->type != CPP_EOF)
1870 cpp_output_token (token, fp);
1871 token = cpp_get_token (pfile);
1872 if (token->flags & PREV_WHITE)
1879 /* Memory buffers. Changing these three constants can have a dramatic
1880 effect on performance. The values here are reasonable defaults,
1881 but might be tuned. If you adjust them, be sure to test across a
1882 range of uses of cpplib, including heavy nested function-like macro
1883 expansion. Also check the change in peak memory usage (NJAMD is a
1884 good tool for this). */
1885 #define MIN_BUFF_SIZE 8000
1886 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1887 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1888 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1890 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1891 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1894 /* Create a new allocation buffer. Place the control block at the end
1895 of the buffer, so that buffer overflows will cause immediate chaos. */
1897 new_buff (size_t len)
1900 unsigned char *base;
1902 if (len < MIN_BUFF_SIZE)
1903 len = MIN_BUFF_SIZE;
1904 len = CPP_ALIGN (len);
1906 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1907 result = (_cpp_buff *) (base + len);
1908 result->base = base;
1910 result->limit = base + len;
1911 result->next = NULL;
1915 /* Place a chain of unwanted allocation buffers on the free list. */
1917 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1919 _cpp_buff *end = buff;
1923 end->next = pfile->free_buffs;
1924 pfile->free_buffs = buff;
1927 /* Return a free buffer of size at least MIN_SIZE. */
1929 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1931 _cpp_buff *result, **p;
1933 for (p = &pfile->free_buffs;; p = &(*p)->next)
1938 return new_buff (min_size);
1940 size = result->limit - result->base;
1941 /* Return a buffer that's big enough, but don't waste one that's
1943 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1948 result->next = NULL;
1949 result->cur = result->base;
1953 /* Creates a new buffer with enough space to hold the uncommitted
1954 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1955 the excess bytes to the new buffer. Chains the new buffer after
1956 BUFF, and returns the new buffer. */
1958 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1960 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1961 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1963 buff->next = new_buff;
1964 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1968 /* Creates a new buffer with enough space to hold the uncommitted
1969 remaining bytes of the buffer pointed to by BUFF, and at least
1970 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1971 Chains the new buffer before the buffer pointed to by BUFF, and
1972 updates the pointer to point to the new buffer. */
1974 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1976 _cpp_buff *new_buff, *old_buff = *pbuff;
1977 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1979 new_buff = _cpp_get_buff (pfile, size);
1980 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1981 new_buff->next = old_buff;
1985 /* Free a chain of buffers starting at BUFF. */
1987 _cpp_free_buff (_cpp_buff *buff)
1991 for (; buff; buff = next)
1998 /* Allocate permanent, unaligned storage of length LEN. */
2000 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2002 _cpp_buff *buff = pfile->u_buff;
2003 unsigned char *result = buff->cur;
2005 if (len > (size_t) (buff->limit - result))
2007 buff = _cpp_get_buff (pfile, len);
2008 buff->next = pfile->u_buff;
2009 pfile->u_buff = buff;
2013 buff->cur = result + len;
2017 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2018 That buffer is used for growing allocations when saving macro
2019 replacement lists in a #define, and when parsing an answer to an
2020 assertion in #assert, #unassert or #if (and therefore possibly
2021 whilst expanding macros). It therefore must not be used by any
2022 code that they might call: specifically the lexer and the guts of
2025 All existing other uses clearly fit this restriction: storing
2026 registered pragmas during initialization. */
2028 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2030 _cpp_buff *buff = pfile->a_buff;
2031 unsigned char *result = buff->cur;
2033 if (len > (size_t) (buff->limit - result))
2035 buff = _cpp_get_buff (pfile, len);
2036 buff->next = pfile->a_buff;
2037 pfile->a_buff = buff;
2041 buff->cur = result + len;
2045 /* Say which field of TOK is in use. */
2047 enum cpp_token_fld_kind
2048 cpp_token_val_index (cpp_token *tok)
2050 switch (TOKEN_SPELL (tok))
2053 return CPP_TOKEN_FLD_NODE;
2055 return CPP_TOKEN_FLD_STR;
2057 if (tok->type == CPP_MACRO_ARG)
2058 return CPP_TOKEN_FLD_ARG_NO;
2059 else if (tok->type == CPP_PADDING)
2060 return CPP_TOKEN_FLD_SOURCE;
2061 else if (tok->type == CPP_PRAGMA)
2062 return CPP_TOKEN_FLD_PRAGMA;
2063 /* else fall through */
2065 return CPP_TOKEN_FLD_NONE;