1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
38 enum spell_type category;
39 const unsigned char *name;
42 static const unsigned char *const digraph_spellings[] =
43 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, U s },
46 #define TK(e, s) { SPELL_ ## s, U #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
65 static _cpp_buff *new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token *token, const char *string)
75 if (token->type != CPP_NAME)
78 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 if (buffer->notes_used == buffer->notes_cap)
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
98 /* Returns with a logical line that contains no escaped newlines or
99 trigraphs. This is a time-critical inner loop. */
101 _cpp_clean_line (cpp_reader *pfile)
107 buffer = pfile->buffer;
108 buffer->cur_note = buffer->notes_used = 0;
109 buffer->cur = buffer->line_base = buffer->next_line;
110 buffer->need_line = false;
111 s = buffer->next_line - 1;
113 if (!buffer->from_stage3)
115 /* Short circuit for the common case of an un-escaped line with
116 no trigraphs. The primary win here is by not writing any
117 data back to memory until we have to. */
121 if (c == '\n' || c == '\r')
125 if (s == buffer->rlimit)
128 /* DOS line ending? */
129 if (c == '\r' && s[1] == '\n')
132 if (s == buffer->rlimit)
135 /* check for escaped newline */
137 while (p != buffer->next_line && is_nvspace (p[-1]))
139 if (p == buffer->next_line || p[-1] != '\\')
142 /* Have an escaped newline; process it and proceed to
144 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
146 buffer->next_line = p - 1;
149 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
151 /* Have a trigraph. We may or may not have to convert
152 it. Add a line note regardless, for -Wtrigraphs. */
153 add_line_note (buffer, s, s[2]);
154 if (CPP_OPTION (pfile, trigraphs))
156 /* We do, and that means we have to switch to the
159 *d = _cpp_trigraph_map[s[2]];
172 if (c == '\n' || c == '\r')
174 /* Handle DOS line endings. */
175 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
177 if (s == buffer->rlimit)
182 while (p != buffer->next_line && is_nvspace (p[-1]))
184 if (p == buffer->next_line || p[-1] != '\\')
187 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
189 buffer->next_line = p - 1;
191 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
193 /* Add a note regardless, for the benefit of -Wtrigraphs. */
194 add_line_note (buffer, d, s[2]);
195 if (CPP_OPTION (pfile, trigraphs))
197 *d = _cpp_trigraph_map[s[2]];
207 while (*s != '\n' && *s != '\r');
210 /* Handle DOS line endings. */
211 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
217 /* A sentinel note that should never be processed. */
218 add_line_note (buffer, d + 1, '\n');
219 buffer->next_line = s + 1;
222 /* Return true if the trigraph indicated by NOTE should be warned
223 about in a comment. */
225 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
229 /* Within comments we don't warn about trigraphs, unless the
230 trigraph forms an escaped newline, as that may change
232 if (note->type != '/')
235 /* If -trigraphs, then this was an escaped newline iff the next note
237 if (CPP_OPTION (pfile, trigraphs))
238 return note[1].pos == note->pos;
240 /* Otherwise, see if this forms an escaped newline. */
242 while (is_nvspace (*p))
245 /* There might have been escaped newlines between the trigraph and the
246 newline we found. Hence the position test. */
247 return (*p == '\n' && p < note[1].pos);
250 /* Process the notes created by add_line_note as far as the current
253 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
255 cpp_buffer *buffer = pfile->buffer;
259 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
262 if (note->pos > buffer->cur)
266 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
268 if (note->type == '\\' || note->type == ' ')
270 if (note->type == ' ' && !in_comment)
271 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
272 "backslash and newline separated by space");
274 if (buffer->next_line > buffer->rlimit)
276 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
277 "backslash-newline at end of file");
278 /* Prevent "no newline at end of file" warning. */
279 buffer->next_line = buffer->rlimit;
282 buffer->line_base = note->pos;
283 CPP_INCREMENT_LINE (pfile, 0);
285 else if (_cpp_trigraph_map[note->type])
287 if (CPP_OPTION (pfile, warn_trigraphs)
288 && (!in_comment || warn_in_comment (pfile, note)))
290 if (CPP_OPTION (pfile, trigraphs))
291 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
292 "trigraph ??%c converted to %c",
294 (int) _cpp_trigraph_map[note->type]);
298 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
299 "trigraph ??%c ignored, use -trigraphs to enable",
309 /* SDCC _asm specific */
310 /* Skip an _asm ... _endasm block. We find the end of the comment by
311 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
314 skip_asm_block (cpp_reader *pfile)
316 #define _ENDASM_STR "endasm"
317 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
319 cpp_buffer *buffer = pfile->buffer;
324 while (buffer->cur != buffer->rlimit)
326 prev_space = is_space(c);
329 if (prev_space && c == '_')
331 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
332 strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
334 buffer->cur += _ENDASM_LEN;
343 _cpp_process_line_notes (pfile, true);
344 if (buffer->next_line >= buffer->rlimit)
346 _cpp_clean_line (pfile);
348 cols = buffer->next_line - buffer->line_base;
349 CPP_INCREMENT_LINE (pfile, cols);
353 _cpp_process_line_notes (pfile, true);
357 /* Skip a C-style block comment. We find the end of the comment by
358 seeing if an asterisk is before every '/' we encounter. Returns
359 nonzero if comment terminated by EOF, zero otherwise.
361 Buffer->cur points to the initial asterisk of the comment. */
363 _cpp_skip_block_comment (cpp_reader *pfile)
365 cpp_buffer *buffer = pfile->buffer;
366 const uchar *cur = buffer->cur;
375 /* People like decorating comments with '*', so check for '/'
376 instead for efficiency. */
384 /* Warn about potential nested comments, but not if the '/'
385 comes immediately before the true comment delimiter.
386 Don't bother to get it right across escaped newlines. */
387 if (CPP_OPTION (pfile, warn_comments)
388 && cur[0] == '*' && cur[1] != '/')
391 cpp_error_with_line (pfile, CPP_DL_WARNING,
392 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
393 "\"/*\" within comment");
399 buffer->cur = cur - 1;
400 _cpp_process_line_notes (pfile, true);
401 if (buffer->next_line >= buffer->rlimit)
403 _cpp_clean_line (pfile);
405 cols = buffer->next_line - buffer->line_base;
406 CPP_INCREMENT_LINE (pfile, cols);
413 _cpp_process_line_notes (pfile, true);
417 /* Skip a C++ line comment, leaving buffer->cur pointing to the
418 terminating newline. Handles escaped newlines. Returns nonzero
419 if a multiline comment. */
421 skip_line_comment (cpp_reader *pfile)
423 cpp_buffer *buffer = pfile->buffer;
424 unsigned int orig_line = pfile->line_table->highest_line;
426 while (*buffer->cur != '\n')
429 _cpp_process_line_notes (pfile, true);
430 return orig_line != pfile->line_table->highest_line;
433 /* Skips whitespace, saving the next non-whitespace character. */
435 skip_whitespace (cpp_reader *pfile, cppchar_t c)
437 cpp_buffer *buffer = pfile->buffer;
438 bool saw_NUL = false;
442 /* Horizontal space always OK. */
443 if (c == ' ' || c == '\t')
445 /* Just \f \v or \0 left. */
448 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
449 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
450 CPP_BUF_COL (buffer),
451 "%s in preprocessing directive",
452 c == '\f' ? "form feed" : "vertical tab");
456 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
457 while (is_nvspace (c));
460 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
465 /* See if the characters of a number token are valid in a name (no
468 name_p (cpp_reader *pfile, const cpp_string *string)
472 for (i = 0; i < string->len; i++)
473 if (!is_idchar (string->text[i]))
479 /* After parsing an identifier or other sequence, produce a warning about
480 sequences not in NFC/NFKC. */
482 warn_about_normalization (cpp_reader *pfile,
483 const cpp_token *token,
484 const struct normalize_state *s)
486 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
487 && !pfile->state.skipping)
489 /* Make sure that the token is printed using UCNs, even
490 if we'd otherwise happily print UTF-8. */
491 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
494 sz = cpp_spell_token (pfile, token, buf, false) - buf;
495 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
496 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
497 "`%.*s' is not in NFKC", (int) sz, buf);
499 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
500 "`%.*s' is not in NFC", (int) sz, buf);
504 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
505 an identifier. FIRST is TRUE if this starts an identifier. */
507 forms_identifier_p (cpp_reader *pfile, int first,
508 struct normalize_state *state)
510 cpp_buffer *buffer = pfile->buffer;
512 if (*buffer->cur == '$')
514 if (!CPP_OPTION (pfile, dollars_in_ident))
518 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
520 CPP_OPTION (pfile, warn_dollars) = 0;
521 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
527 /* Is this a syntactically valid UCN? */
528 if (CPP_OPTION (pfile, extended_identifiers)
529 && *buffer->cur == '\\'
530 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
533 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
542 /* Lex an identifier starting at BUFFER->CUR - 1. */
543 static cpp_hashnode *
544 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
545 struct normalize_state *nst)
547 cpp_hashnode *result;
550 unsigned int hash = HT_HASHSTEP (0, *base);
552 cur = pfile->buffer->cur;
554 while (ISIDNUM (*cur))
556 hash = HT_HASHSTEP (hash, *cur);
559 pfile->buffer->cur = cur;
560 if (starts_ucn || forms_identifier_p (pfile, false, nst))
562 /* Slower version for identifiers containing UCNs (or $). */
564 while (ISIDNUM (*pfile->buffer->cur))
566 pfile->buffer->cur++;
567 NORMALIZE_STATE_UPDATE_IDNUM (nst);
569 } while (forms_identifier_p (pfile, false, nst));
570 result = _cpp_interpret_identifier (pfile, base,
571 pfile->buffer->cur - base);
576 hash = HT_HASHFINISH (hash, len);
578 result = (cpp_hashnode *)
579 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
582 /* Rarely, identifiers require diagnostics when lexed. */
583 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
584 && !pfile->state.skipping, 0))
586 /* It is allowed to poison the same identifier twice. */
587 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
588 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
591 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
592 replacement list of a variadic macro. */
593 if (result == pfile->spec_nodes.n__VA_ARGS__
594 && !pfile->state.va_args_ok)
595 cpp_error (pfile, CPP_DL_PEDWARN,
596 "__VA_ARGS__ can only appear in the expansion"
597 " of a C99 variadic macro");
604 /* Pedantic parse a number, beginning with character C, skipping embedded
605 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
606 before C. Place the result in NUMBER. */
608 pedantic_lex_number (cpp_reader *pfile, cpp_string *number)
610 #define get_effective_char(pfile) (*pfile->buffer->cur++)
611 #define BACKUP() (--pfile->buffer->cur)
613 enum num_type_e { NT_DEC, NT_HEX } num_type = NT_DEC;
614 enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
616 uchar c = *(pfile->buffer->cur - 1);
617 struct obstack *stack = &pfile->hash_table->stack;
626 obstack_1grow (stack, '.');
627 c = get_effective_char(pfile);
635 obstack_1grow (stack, c);
636 c = get_effective_char(pfile);
644 obstack_1grow (stack, c);
645 c = get_effective_char(pfile);
651 obstack_1grow (stack, c);
652 c = get_effective_char(pfile);
663 if (NT_DEC == num_type)
669 obstack_1grow (stack, c);
670 c = get_effective_char(pfile);
677 obstack_1grow (stack, c);
678 c = get_effective_char(pfile);
681 else if ('E' == c || 'e' == c)
683 if (has_whole || has_fract)
687 obstack_1grow (stack, c);
688 c = get_effective_char(pfile);
701 obstack_1grow (stack, c);
702 c = get_effective_char(pfile);
709 obstack_1grow (stack, c);
710 c = get_effective_char(pfile);
713 else if ('P' == c || 'p' == c)
715 if (has_whole || has_fract)
719 obstack_1grow (stack, c);
720 c = get_effective_char(pfile);
727 num_part = NP_INT_SUFFIX;
731 if (NT_DEC == num_type)
737 obstack_1grow (stack, c);
738 c = get_effective_char(pfile);
741 if ('E' == c || 'e' == c)
743 if (has_whole || has_fract)
747 obstack_1grow (stack, c);
748 c = get_effective_char(pfile);
759 obstack_1grow (stack, c);
760 c = get_effective_char(pfile);
763 if ('P' == c || 'p' == c)
765 if (has_whole || has_fract)
769 obstack_1grow (stack, c);
770 c = get_effective_char(pfile);
775 num_part = NP_FLOAT_SUFFIX;
779 if ('+' == c || '-' == c)
782 obstack_1grow (stack, c);
783 c = get_effective_char(pfile);
789 obstack_1grow (stack, c);
790 c = get_effective_char(pfile);
793 num_part = NP_FLOAT_SUFFIX;
797 if ('L' == c || 'l' == c)
802 obstack_1grow (stack, c);
803 c = get_effective_char(pfile);
808 obstack_1grow (stack, c);
809 c = get_effective_char(pfile);
812 else if ('U' == c || 'u' == c)
815 obstack_1grow (stack, c);
816 c = get_effective_char(pfile);
820 case NP_FLOAT_SUFFIX:
821 if ('F' == c || 'f' == c)
824 obstack_1grow (stack, c);
825 c = get_effective_char(pfile);
827 else if ('L' == c || 'l' == c)
830 obstack_1grow (stack, c);
831 c = get_effective_char(pfile);
838 /* Step back over the unwanted char. */
841 number->text = obstack_finish (stack);
845 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
847 lex_number (cpp_reader *pfile, cpp_string *number,
848 struct normalize_state *nst)
854 base = pfile->buffer->cur - 1;
857 cur = pfile->buffer->cur;
859 /* N.B. ISIDNUM does not include $. */
860 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
863 NORMALIZE_STATE_UPDATE_IDNUM (nst);
866 pfile->buffer->cur = cur;
868 while (forms_identifier_p (pfile, false, nst));
870 number->len = cur - base;
871 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
872 memcpy (dest, base, number->len);
873 dest[number->len] = '\0';
877 /* Create a token of type TYPE with a literal spelling. */
879 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
880 unsigned int len, enum cpp_ttype type)
882 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
884 memcpy (dest, base, len);
887 token->val.str.len = len;
888 token->val.str.text = dest;
891 /* Lexes a string, character constant, or angle-bracketed header file
892 name. The stored string contains the spelling, including opening
893 quote and leading any leading 'L'. It returns the type of the
894 literal, or CPP_OTHER if it was not properly terminated.
896 The spelling is NUL-terminated, but it is not guaranteed that this
897 is the first NUL since embedded NULs are preserved. */
899 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
901 bool saw_NUL = false;
903 cppchar_t terminator;
908 if (terminator == 'L')
910 if (terminator == '\"')
911 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
912 else if (terminator == '\'')
913 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
915 terminator = '>', type = CPP_HEADER_NAME;
919 cppchar_t c = *cur++;
921 /* In #include-style directives, terminators are not escapable. */
922 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
924 else if (c == terminator)
936 if (saw_NUL && !pfile->state.skipping)
937 cpp_error (pfile, CPP_DL_WARNING,
938 "null character(s) preserved in literal");
940 pfile->buffer->cur = cur;
941 create_literal (pfile, token, base, cur - base, type);
944 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
945 comment blocks (when executed with -C option) and
946 _asm (SDCPP specific) blocks */
948 /* Count and copy characters from src to dest, excluding CRs:
949 CRs are automatically generated, because the output is
950 opened in TEXT mode. If dest == NULL, only count chars */
952 copy_text_chars (char *dest, const char *src, unsigned int len)
957 for (p = src; p != src + len; ++p)
972 /* SDCC _asm specific */
973 /* The stored comment includes the comment start and any terminator. */
975 save_asm (cpp_reader *pfile, cpp_token *token, const unsigned char *from)
977 #define _ASM_STR "_asm"
978 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
980 unsigned char *buffer;
981 unsigned int text_len, len;
983 len = pfile->buffer->cur - from;
984 /* + _ASM_LEN for the initial '_asm'. */
985 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
986 buffer = _cpp_unaligned_alloc (pfile, text_len);
989 token->type = CPP_ASM;
990 token->val.str.len = text_len;
991 token->val.str.text = buffer;
993 memcpy (buffer, _ASM_STR, _ASM_LEN);
994 copy_text_chars (buffer + _ASM_LEN, from, len);
997 /* The stored comment includes the comment start and any terminator. */
999 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1002 unsigned char *buffer;
1003 unsigned int len, clen;
1005 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1007 /* C++ comments probably (not definitely) have moved past a new
1008 line, which we don't want to save in the comment. */
1009 if (is_vspace (pfile->buffer->cur[-1]))
1012 /* If we are currently in a directive, then we need to store all
1013 C++ comments as C comments internally, and so we need to
1014 allocate a little extra space in that case.
1016 Note that the only time we encounter a directive here is
1017 when we are saving comments in a "#define". */
1018 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1020 buffer = _cpp_unaligned_alloc (pfile, clen);
1022 token->type = CPP_COMMENT;
1023 token->val.str.len = clen;
1024 token->val.str.text = buffer;
1027 copy_text_chars (buffer + 1, from, len);
1029 /* Finish conversion to a C comment, if necessary. */
1030 if (pfile->state.in_directive && type == '/')
1033 buffer[clen - 2] = '*';
1034 buffer[clen - 1] = '/';
1038 /* Allocate COUNT tokens for RUN. */
1040 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1042 run->base = XNEWVEC (cpp_token, count);
1043 run->limit = run->base + count;
1047 /* Returns the next tokenrun, or creates one if there is none. */
1049 next_tokenrun (tokenrun *run)
1051 if (run->next == NULL)
1053 run->next = XNEW (tokenrun);
1054 run->next->prev = run;
1055 _cpp_init_tokenrun (run->next, 250);
1061 /* Allocate a single token that is invalidated at the same time as the
1062 rest of the tokens on the line. Has its line and col set to the
1063 same as the last lexed token, so that diagnostics appear in the
1066 _cpp_temp_token (cpp_reader *pfile)
1068 cpp_token *old, *result;
1070 old = pfile->cur_token - 1;
1071 if (pfile->cur_token == pfile->cur_run->limit)
1073 pfile->cur_run = next_tokenrun (pfile->cur_run);
1074 pfile->cur_token = pfile->cur_run->base;
1077 result = pfile->cur_token++;
1078 result->src_loc = old->src_loc;
1082 /* Lex a token into RESULT (external interface). Takes care of issues
1083 like directive handling, token lookahead, multiple include
1084 optimization and skipping. */
1086 _cpp_lex_token (cpp_reader *pfile)
1092 if (pfile->cur_token == pfile->cur_run->limit)
1094 pfile->cur_run = next_tokenrun (pfile->cur_run);
1095 pfile->cur_token = pfile->cur_run->base;
1098 if (pfile->lookaheads)
1100 pfile->lookaheads--;
1101 result = pfile->cur_token++;
1104 result = _cpp_lex_direct (pfile);
1106 if (result->flags & BOL)
1108 /* Is this a directive. If _cpp_handle_directive returns
1109 false, it is an assembler #. */
1110 if (result->type == CPP_HASH
1111 /* 6.10.3 p 11: Directives in a list of macro arguments
1112 gives undefined behavior. This implementation
1113 handles the directive as normal. */
1114 && pfile->state.parsing_args != 1
1115 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1117 if (pfile->directive_result.type == CPP_PADDING)
1121 result = &pfile->directive_result;
1126 if (pfile->cb.line_change && !pfile->state.skipping)
1127 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1130 /* We don't skip tokens in directives. */
1131 if (pfile->state.in_directive)
1134 /* Outside a directive, invalidate controlling macros. At file
1135 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1136 get here and MI optimization works. */
1137 pfile->mi_valid = false;
1139 if (!pfile->state.skipping || result->type == CPP_EOF)
1146 /* Returns true if a fresh line has been loaded. */
1148 _cpp_get_fresh_line (cpp_reader *pfile)
1152 /* We can't get a new line until we leave the current directive. */
1153 if (pfile->state.in_directive)
1158 cpp_buffer *buffer = pfile->buffer;
1160 if (!buffer->need_line)
1163 if (buffer->next_line < buffer->rlimit)
1165 _cpp_clean_line (pfile);
1169 /* First, get out of parsing arguments state. */
1170 if (pfile->state.parsing_args)
1173 /* End of buffer. Non-empty files should end in a newline. */
1174 if (buffer->buf != buffer->rlimit
1175 && buffer->next_line > buffer->rlimit
1176 && !buffer->from_stage3)
1178 /* Only warn once. */
1179 buffer->next_line = buffer->rlimit;
1180 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1181 CPP_BUF_COLUMN (buffer, buffer->cur),
1182 "no newline at end of file");
1185 return_at_eof = buffer->return_at_eof;
1186 _cpp_pop_buffer (pfile);
1187 if (pfile->buffer == NULL || return_at_eof)
1192 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1195 result->type = ELSE_TYPE; \
1196 if (*buffer->cur == CHAR) \
1197 buffer->cur++, result->type = THEN_TYPE; \
1201 /* Lex a token into pfile->cur_token, which is also incremented, to
1202 get diagnostics pointing to the correct location.
1204 Does not handle issues such as token lookahead, multiple-include
1205 optimization, directives, skipping etc. This function is only
1206 suitable for use by _cpp_lex_token, and in special cases like
1207 lex_expansion_token which doesn't care for any of these issues.
1209 When meeting a newline, returns CPP_EOF if parsing a directive,
1210 otherwise returns to the start of the token buffer if permissible.
1211 Returns the location of the lexed token. */
1213 _cpp_lex_direct (cpp_reader *pfile)
1217 const unsigned char *comment_start;
1218 cpp_token *result = pfile->cur_token++;
1222 buffer = pfile->buffer;
1223 if (buffer->need_line)
1225 if (!_cpp_get_fresh_line (pfile))
1227 result->type = CPP_EOF;
1228 if (!pfile->state.in_directive)
1230 /* Tell the compiler the line number of the EOF token. */
1231 result->src_loc = pfile->line_table->highest_line;
1232 result->flags = BOL;
1236 if (!pfile->keep_tokens)
1238 pfile->cur_run = &pfile->base_run;
1239 result = pfile->base_run.base;
1240 pfile->cur_token = result + 1;
1242 result->flags = BOL;
1243 if (pfile->state.parsing_args == 2)
1244 result->flags |= PREV_WHITE;
1246 buffer = pfile->buffer;
1248 result->src_loc = pfile->line_table->highest_line;
1251 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1252 && !pfile->overlaid_buffer)
1254 _cpp_process_line_notes (pfile, false);
1255 result->src_loc = pfile->line_table->highest_line;
1259 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1260 CPP_BUF_COLUMN (buffer, buffer->cur));
1264 case ' ': case '\t': case '\f': case '\v': case '\0':
1265 result->flags |= PREV_WHITE;
1266 skip_whitespace (pfile, c);
1270 if (buffer->cur < buffer->rlimit)
1271 CPP_INCREMENT_LINE (pfile, 0);
1272 buffer->need_line = true;
1275 case '0': case '1': case '2': case '3': case '4':
1276 case '5': case '6': case '7': case '8': case '9':
1278 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1279 result->type = CPP_NUMBER;
1280 if (CPP_OPTION(pfile, pedantic_parse_number))
1281 pedantic_lex_number (pfile, &result->val.str);
1283 lex_number (pfile, &result->val.str, &nst);
1284 warn_about_normalization (pfile, result, &nst);
1289 /* 'L' may introduce wide characters or strings. */
1290 if (*buffer->cur == '\'' || *buffer->cur == '"')
1292 lex_string (pfile, result, buffer->cur - 1);
1298 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1299 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1300 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1301 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1303 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1304 case 'G': case 'H': case 'I': case 'J': case 'K':
1305 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1306 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1308 result->type = CPP_NAME;
1310 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1311 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1313 warn_about_normalization (pfile, result, &nst);
1316 /* SDCC _asm specific */
1317 /* handle _asm ... _endasm ; */
1318 if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1320 comment_start = buffer->cur;
1321 result->type = CPP_ASM;
1322 skip_asm_block (pfile);
1323 /* Save the _asm block as a token in its own right. */
1324 save_asm (pfile, result, comment_start);
1326 /* Convert named operators to their proper types. */
1327 else if (result->val.node->flags & NODE_OPERATOR)
1329 result->flags |= NAMED_OP;
1330 result->type = (enum cpp_ttype) result->val.node->directive_index;
1336 lex_string (pfile, result, buffer->cur - 1);
1340 /* A potential block or line comment. */
1341 comment_start = buffer->cur;
1346 if (_cpp_skip_block_comment (pfile))
1347 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1349 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1350 || cpp_in_system_header (pfile)))
1352 /* Warn about comments only if pedantically GNUC89, and not
1353 in system headers. */
1354 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1355 && ! buffer->warned_cplusplus_comments)
1357 cpp_error (pfile, CPP_DL_PEDWARN,
1358 "C++ style comments are not allowed in ISO C90");
1359 cpp_error (pfile, CPP_DL_PEDWARN,
1360 "(this will be reported only once per input file)");
1361 buffer->warned_cplusplus_comments = 1;
1364 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1365 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1370 result->type = CPP_DIV_EQ;
1375 result->type = CPP_DIV;
1379 if (!pfile->state.save_comments)
1381 result->flags |= PREV_WHITE;
1382 goto update_tokens_line;
1385 /* Save the comment as a token in its own right. */
1386 save_comment (pfile, result, comment_start, c);
1390 if (pfile->state.angled_headers)
1392 lex_string (pfile, result, buffer->cur - 1);
1396 result->type = CPP_LESS;
1397 if (*buffer->cur == '=')
1398 buffer->cur++, result->type = CPP_LESS_EQ;
1399 else if (*buffer->cur == '<')
1402 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1404 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1407 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1409 else if (CPP_OPTION (pfile, digraphs))
1411 if (*buffer->cur == ':')
1414 result->flags |= DIGRAPH;
1415 result->type = CPP_OPEN_SQUARE;
1417 else if (*buffer->cur == '%')
1420 result->flags |= DIGRAPH;
1421 result->type = CPP_OPEN_BRACE;
1427 result->type = CPP_GREATER;
1428 if (*buffer->cur == '=')
1429 buffer->cur++, result->type = CPP_GREATER_EQ;
1430 else if (*buffer->cur == '>')
1433 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1435 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1438 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1443 result->type = CPP_MOD;
1444 if (*buffer->cur == '=')
1445 buffer->cur++, result->type = CPP_MOD_EQ;
1446 else if (CPP_OPTION (pfile, digraphs))
1448 if (*buffer->cur == ':')
1451 result->flags |= DIGRAPH;
1452 result->type = CPP_HASH;
1453 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1454 buffer->cur += 2, result->type = CPP_PASTE;
1456 else if (*buffer->cur == '>')
1459 result->flags |= DIGRAPH;
1460 result->type = CPP_CLOSE_BRACE;
1466 result->type = CPP_DOT;
1467 if (ISDIGIT (*buffer->cur))
1469 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1470 result->type = CPP_NUMBER;
1471 if (CPP_OPTION(pfile, pedantic_parse_number))
1472 pedantic_lex_number (pfile, &result->val.str);
1474 lex_number (pfile, &result->val.str, &nst);
1475 warn_about_normalization (pfile, result, &nst);
1477 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1478 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1479 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1480 buffer->cur++, result->type = CPP_DOT_STAR;
1484 result->type = CPP_PLUS;
1485 if (*buffer->cur == '+')
1486 buffer->cur++, result->type = CPP_PLUS_PLUS;
1487 else if (*buffer->cur == '=')
1488 buffer->cur++, result->type = CPP_PLUS_EQ;
1492 result->type = CPP_MINUS;
1493 if (*buffer->cur == '>')
1496 result->type = CPP_DEREF;
1497 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1498 buffer->cur++, result->type = CPP_DEREF_STAR;
1500 else if (*buffer->cur == '-')
1501 buffer->cur++, result->type = CPP_MINUS_MINUS;
1502 else if (*buffer->cur == '=')
1503 buffer->cur++, result->type = CPP_MINUS_EQ;
1507 result->type = CPP_AND;
1508 if (*buffer->cur == '&')
1509 buffer->cur++, result->type = CPP_AND_AND;
1510 else if (*buffer->cur == '=')
1511 buffer->cur++, result->type = CPP_AND_EQ;
1515 result->type = CPP_OR;
1516 if (*buffer->cur == '|')
1517 buffer->cur++, result->type = CPP_OR_OR;
1518 else if (*buffer->cur == '=')
1519 buffer->cur++, result->type = CPP_OR_EQ;
1523 result->type = CPP_COLON;
1524 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1525 buffer->cur++, result->type = CPP_SCOPE;
1526 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1529 result->flags |= DIGRAPH;
1530 result->type = CPP_CLOSE_SQUARE;
1534 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1535 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1536 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1537 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1538 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1540 case '?': result->type = CPP_QUERY; break;
1541 case '~': result->type = CPP_COMPL; break;
1542 case ',': result->type = CPP_COMMA; break;
1543 case '(': result->type = CPP_OPEN_PAREN; break;
1544 case ')': result->type = CPP_CLOSE_PAREN; break;
1545 case '[': result->type = CPP_OPEN_SQUARE; break;
1546 case ']': result->type = CPP_CLOSE_SQUARE; break;
1547 case '{': result->type = CPP_OPEN_BRACE; break;
1548 case '}': result->type = CPP_CLOSE_BRACE; break;
1549 case ';': result->type = CPP_SEMICOLON; break;
1551 /* @ is a punctuator in Objective-C. */
1552 case '@': result->type = CPP_ATSIGN; break;
1557 const uchar *base = --buffer->cur;
1558 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1560 if (forms_identifier_p (pfile, true, &nst))
1562 result->type = CPP_NAME;
1563 result->val.node = lex_identifier (pfile, base, true, &nst);
1564 warn_about_normalization (pfile, result, &nst);
1571 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1578 /* An upper bound on the number of bytes needed to spell TOKEN.
1579 Does not include preceding whitespace. */
1581 cpp_token_len (const cpp_token *token)
1585 switch (TOKEN_SPELL (token))
1587 default: len = 4; break;
1588 case SPELL_LITERAL: len = token->val.str.len; break;
1589 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1595 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1596 Return the number of bytes read out of NAME. (There are always
1597 10 bytes written to BUFFER.) */
1600 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1606 unsigned long utf32;
1608 /* Compute the length of the UTF-8 sequence. */
1609 for (t = *name; t & 0x80; t <<= 1)
1612 utf32 = *name & (0x7F >> ucn_len);
1613 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1615 utf32 = (utf32 << 6) | (*++name & 0x3F);
1617 /* Ill-formed UTF-8. */
1618 if ((*name & ~0x3F) != 0x80)
1624 for (j = 7; j >= 0; j--)
1625 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1630 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1631 already contain the enough space to hold the token's spelling.
1632 Returns a pointer to the character after the last character written.
1633 FORSTRING is true if this is to be the spelling after translation
1634 phase 1 (this is different for UCNs).
1635 FIXME: Would be nice if we didn't need the PFILE argument. */
1637 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1638 unsigned char *buffer, bool forstring)
1640 switch (TOKEN_SPELL (token))
1642 case SPELL_OPERATOR:
1644 const unsigned char *spelling;
1647 if (token->flags & DIGRAPH)
1649 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1650 else if (token->flags & NAMED_OP)
1653 spelling = TOKEN_NAME (token);
1655 while ((c = *spelling++) != '\0')
1664 memcpy (buffer, NODE_NAME (token->val.node),
1665 NODE_LEN (token->val.node));
1666 buffer += NODE_LEN (token->val.node);
1671 const unsigned char * name = NODE_NAME (token->val.node);
1673 for (i = 0; i < NODE_LEN (token->val.node); i++)
1674 if (name[i] & ~0x7F)
1676 i += utf8_to_ucn (buffer, name + i) - 1;
1680 *buffer++ = NODE_NAME (token->val.node)[i];
1685 memcpy (buffer, token->val.str.text, token->val.str.len);
1686 buffer += token->val.str.len;
1690 cpp_error (pfile, CPP_DL_ICE,
1691 "unspellable token %s", TOKEN_NAME (token));
1698 /* Returns TOKEN spelt as a null-terminated string. The string is
1699 freed when the reader is destroyed. Useful for diagnostics. */
1701 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1703 unsigned int len = cpp_token_len (token) + 1;
1704 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1706 end = cpp_spell_token (pfile, token, start, false);
1712 /* Used by C front ends, which really should move to using
1713 cpp_token_as_text. */
1715 cpp_type2name (enum cpp_ttype type)
1717 return (const char *) token_spellings[type].name;
1720 /* Writes the spelling of token to FP, without any preceding space.
1721 Separated from cpp_spell_token for efficiency - to avoid stdio
1722 double-buffering. */
1724 cpp_output_token (const cpp_token *token, FILE *fp)
1726 switch (TOKEN_SPELL (token))
1728 case SPELL_OPERATOR:
1730 const unsigned char *spelling;
1733 if (token->flags & DIGRAPH)
1735 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1736 else if (token->flags & NAMED_OP)
1739 spelling = TOKEN_NAME (token);
1744 while ((c = *++spelling) != '\0');
1752 const unsigned char * name = NODE_NAME (token->val.node);
1754 for (i = 0; i < NODE_LEN (token->val.node); i++)
1755 if (name[i] & ~0x7F)
1757 unsigned char buffer[10];
1758 i += utf8_to_ucn (buffer, name + i) - 1;
1759 fwrite (buffer, 1, 10, fp);
1762 fputc (NODE_NAME (token->val.node)[i], fp);
1767 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1771 /* An error, most probably. */
1776 /* Compare two tokens. */
1778 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1780 if (a->type == b->type && a->flags == b->flags)
1781 switch (TOKEN_SPELL (a))
1783 default: /* Keep compiler happy. */
1784 case SPELL_OPERATOR:
1787 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1789 return a->val.node == b->val.node;
1791 return (a->val.str.len == b->val.str.len
1792 && !memcmp (a->val.str.text, b->val.str.text,
1799 /* Returns nonzero if a space should be inserted to avoid an
1800 accidental token paste for output. For simplicity, it is
1801 conservative, and occasionally advises a space where one is not
1802 needed, e.g. "." and ".2". */
1804 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1805 const cpp_token *token2)
1807 enum cpp_ttype a = token1->type, b = token2->type;
1810 if (token1->flags & NAMED_OP)
1812 if (token2->flags & NAMED_OP)
1816 if (token2->flags & DIGRAPH)
1817 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1818 else if (token_spellings[b].category == SPELL_OPERATOR)
1819 c = token_spellings[b].name[0];
1821 /* Quickly get everything that can paste with an '='. */
1822 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1827 case CPP_GREATER: return c == '>' || c == '?';
1828 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1829 case CPP_PLUS: return c == '+';
1830 case CPP_MINUS: return c == '-' || c == '>';
1831 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1832 case CPP_MOD: return c == ':' || c == '>';
1833 case CPP_AND: return c == '&';
1834 case CPP_OR: return c == '|';
1835 case CPP_COLON: return c == ':' || c == '>';
1836 case CPP_DEREF: return c == '*';
1837 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1838 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1839 case CPP_NAME: return ((b == CPP_NUMBER
1840 && name_p (pfile, &token2->val.str))
1842 || b == CPP_CHAR || b == CPP_STRING); /* L */
1843 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1844 || c == '.' || c == '+' || c == '-');
1846 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1848 || (CPP_OPTION (pfile, objc)
1849 && token1->val.str.text[0] == '@'
1850 && (b == CPP_NAME || b == CPP_STRING)));
1857 /* Output all the remaining tokens on the current line, and a newline
1858 character, to FP. Leading whitespace is removed. If there are
1859 macros, special token padding is not performed. */
1861 cpp_output_line (cpp_reader *pfile, FILE *fp)
1863 const cpp_token *token;
1865 token = cpp_get_token (pfile);
1866 while (token->type != CPP_EOF)
1868 cpp_output_token (token, fp);
1869 token = cpp_get_token (pfile);
1870 if (token->flags & PREV_WHITE)
1877 /* Memory buffers. Changing these three constants can have a dramatic
1878 effect on performance. The values here are reasonable defaults,
1879 but might be tuned. If you adjust them, be sure to test across a
1880 range of uses of cpplib, including heavy nested function-like macro
1881 expansion. Also check the change in peak memory usage (NJAMD is a
1882 good tool for this). */
1883 #define MIN_BUFF_SIZE 8000
1884 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1885 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1886 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1888 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1889 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1892 /* Create a new allocation buffer. Place the control block at the end
1893 of the buffer, so that buffer overflows will cause immediate chaos. */
1895 new_buff (size_t len)
1898 unsigned char *base;
1900 if (len < MIN_BUFF_SIZE)
1901 len = MIN_BUFF_SIZE;
1902 len = CPP_ALIGN (len);
1904 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1905 result = (_cpp_buff *) (base + len);
1906 result->base = base;
1908 result->limit = base + len;
1909 result->next = NULL;
1913 /* Place a chain of unwanted allocation buffers on the free list. */
1915 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1917 _cpp_buff *end = buff;
1921 end->next = pfile->free_buffs;
1922 pfile->free_buffs = buff;
1925 /* Return a free buffer of size at least MIN_SIZE. */
1927 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1929 _cpp_buff *result, **p;
1931 for (p = &pfile->free_buffs;; p = &(*p)->next)
1936 return new_buff (min_size);
1938 size = result->limit - result->base;
1939 /* Return a buffer that's big enough, but don't waste one that's
1941 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1946 result->next = NULL;
1947 result->cur = result->base;
1951 /* Creates a new buffer with enough space to hold the uncommitted
1952 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1953 the excess bytes to the new buffer. Chains the new buffer after
1954 BUFF, and returns the new buffer. */
1956 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1958 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1959 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1961 buff->next = new_buff;
1962 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1966 /* Creates a new buffer with enough space to hold the uncommitted
1967 remaining bytes of the buffer pointed to by BUFF, and at least
1968 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1969 Chains the new buffer before the buffer pointed to by BUFF, and
1970 updates the pointer to point to the new buffer. */
1972 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1974 _cpp_buff *new_buff, *old_buff = *pbuff;
1975 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1977 new_buff = _cpp_get_buff (pfile, size);
1978 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1979 new_buff->next = old_buff;
1983 /* Free a chain of buffers starting at BUFF. */
1985 _cpp_free_buff (_cpp_buff *buff)
1989 for (; buff; buff = next)
1996 /* Allocate permanent, unaligned storage of length LEN. */
1998 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2000 _cpp_buff *buff = pfile->u_buff;
2001 unsigned char *result = buff->cur;
2003 if (len > (size_t) (buff->limit - result))
2005 buff = _cpp_get_buff (pfile, len);
2006 buff->next = pfile->u_buff;
2007 pfile->u_buff = buff;
2011 buff->cur = result + len;
2015 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2016 That buffer is used for growing allocations when saving macro
2017 replacement lists in a #define, and when parsing an answer to an
2018 assertion in #assert, #unassert or #if (and therefore possibly
2019 whilst expanding macros). It therefore must not be used by any
2020 code that they might call: specifically the lexer and the guts of
2023 All existing other uses clearly fit this restriction: storing
2024 registered pragmas during initialization. */
2026 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2028 _cpp_buff *buff = pfile->a_buff;
2029 unsigned char *result = buff->cur;
2031 if (len > (size_t) (buff->limit - result))
2033 buff = _cpp_get_buff (pfile, len);
2034 buff->next = pfile->a_buff;
2035 pfile->a_buff = buff;
2039 buff->cur = result + len;
2043 /* Say which field of TOK is in use. */
2045 enum cpp_token_fld_kind
2046 cpp_token_val_index (cpp_token *tok)
2048 switch (TOKEN_SPELL (tok))
2051 return CPP_TOKEN_FLD_NODE;
2053 return CPP_TOKEN_FLD_STR;
2055 if (tok->type == CPP_MACRO_ARG)
2056 return CPP_TOKEN_FLD_ARG_NO;
2057 else if (tok->type == CPP_PADDING)
2058 return CPP_TOKEN_FLD_SOURCE;
2059 else if (tok->type == CPP_PRAGMA)
2060 return CPP_TOKEN_FLD_STR;
2061 /* else fall through */
2063 return CPP_TOKEN_FLD_NONE;