1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
38 enum spell_type category;
39 const unsigned char *name;
42 static const unsigned char *const digraph_spellings[] =
43 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, U s },
46 #define TK(e, s) { SPELL_ ## s, U #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
65 static _cpp_buff *new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token *token, const char *string)
75 if (token->type != CPP_NAME)
78 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 if (buffer->notes_used == buffer->notes_cap)
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
98 /* Returns with a logical line that contains no escaped newlines or
99 trigraphs. This is a time-critical inner loop. */
101 _cpp_clean_line (cpp_reader *pfile)
107 buffer = pfile->buffer;
108 buffer->cur_note = buffer->notes_used = 0;
109 buffer->cur = buffer->line_base = buffer->next_line;
110 buffer->need_line = false;
111 s = buffer->next_line - 1;
113 if (!buffer->from_stage3)
115 /* Short circuit for the common case of an un-escaped line with
116 no trigraphs. The primary win here is by not writing any
117 data back to memory until we have to. */
121 if (c == '\n' || c == '\r')
125 if (s == buffer->rlimit)
128 /* DOS line ending? */
129 if (c == '\r' && s[1] == '\n')
132 if (s == buffer->rlimit)
135 /* check for escaped newline */
137 while (p != buffer->next_line && is_nvspace (p[-1]))
139 if (p == buffer->next_line || p[-1] != '\\')
142 /* Have an escaped newline; process it and proceed to
144 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
146 buffer->next_line = p - 1;
149 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
151 /* Have a trigraph. We may or may not have to convert
152 it. Add a line note regardless, for -Wtrigraphs. */
153 add_line_note (buffer, s, s[2]);
154 if (CPP_OPTION (pfile, trigraphs))
156 /* We do, and that means we have to switch to the
159 *d = _cpp_trigraph_map[s[2]];
172 if (c == '\n' || c == '\r')
174 /* Handle DOS line endings. */
175 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
177 if (s == buffer->rlimit)
182 while (p != buffer->next_line && is_nvspace (p[-1]))
184 if (p == buffer->next_line || p[-1] != '\\')
187 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
189 buffer->next_line = p - 1;
191 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
193 /* Add a note regardless, for the benefit of -Wtrigraphs. */
194 add_line_note (buffer, d, s[2]);
195 if (CPP_OPTION (pfile, trigraphs))
197 *d = _cpp_trigraph_map[s[2]];
207 while (*s != '\n' && *s != '\r');
210 /* Handle DOS line endings. */
211 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
217 /* A sentinel note that should never be processed. */
218 add_line_note (buffer, d + 1, '\n');
219 buffer->next_line = s + 1;
222 /* Return true if the trigraph indicated by NOTE should be warned
223 about in a comment. */
225 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
229 /* Within comments we don't warn about trigraphs, unless the
230 trigraph forms an escaped newline, as that may change
232 if (note->type != '/')
235 /* If -trigraphs, then this was an escaped newline iff the next note
237 if (CPP_OPTION (pfile, trigraphs))
238 return note[1].pos == note->pos;
240 /* Otherwise, see if this forms an escaped newline. */
242 while (is_nvspace (*p))
245 /* There might have been escaped newlines between the trigraph and the
246 newline we found. Hence the position test. */
247 return (*p == '\n' && p < note[1].pos);
250 /* Process the notes created by add_line_note as far as the current
253 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
255 cpp_buffer *buffer = pfile->buffer;
259 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
262 if (note->pos > buffer->cur)
266 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
268 if (note->type == '\\' || note->type == ' ')
270 if (note->type == ' ' && !in_comment)
271 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
272 "backslash and newline separated by space");
274 if (buffer->next_line > buffer->rlimit)
276 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
277 "backslash-newline at end of file");
278 /* Prevent "no newline at end of file" warning. */
279 buffer->next_line = buffer->rlimit;
282 buffer->line_base = note->pos;
283 CPP_INCREMENT_LINE (pfile, 0);
285 else if (_cpp_trigraph_map[note->type])
287 if (CPP_OPTION (pfile, warn_trigraphs)
288 && (!in_comment || warn_in_comment (pfile, note)))
290 if (CPP_OPTION (pfile, trigraphs))
291 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
292 "trigraph ??%c converted to %c",
294 (int) _cpp_trigraph_map[note->type]);
298 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
299 "trigraph ??%c ignored, use -trigraphs to enable",
309 /* SDCC _asm specific */
310 /* Skip an _asm ... _endasm block. We find the end of the comment by
311 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
314 skip_asm_block (cpp_reader *pfile)
316 #define _ENDASM_STR "endasm"
317 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
319 cpp_buffer *buffer = pfile->buffer;
324 while (buffer->cur != buffer->rlimit)
326 prev_space = is_space(c);
329 if (prev_space && c == '_')
331 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
332 strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
334 buffer->cur += _ENDASM_LEN;
343 _cpp_process_line_notes (pfile, true);
344 if (buffer->next_line >= buffer->rlimit)
346 _cpp_clean_line (pfile);
348 cols = buffer->next_line - buffer->line_base;
349 CPP_INCREMENT_LINE (pfile, cols);
353 _cpp_process_line_notes (pfile, true);
357 /* Skip a C-style block comment. We find the end of the comment by
358 seeing if an asterisk is before every '/' we encounter. Returns
359 nonzero if comment terminated by EOF, zero otherwise.
361 Buffer->cur points to the initial asterisk of the comment. */
363 _cpp_skip_block_comment (cpp_reader *pfile)
365 cpp_buffer *buffer = pfile->buffer;
366 const uchar *cur = buffer->cur;
375 /* People like decorating comments with '*', so check for '/'
376 instead for efficiency. */
384 /* Warn about potential nested comments, but not if the '/'
385 comes immediately before the true comment delimiter.
386 Don't bother to get it right across escaped newlines. */
387 if (CPP_OPTION (pfile, warn_comments)
388 && cur[0] == '*' && cur[1] != '/')
391 cpp_error_with_line (pfile, CPP_DL_WARNING,
392 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
393 "\"/*\" within comment");
399 buffer->cur = cur - 1;
400 _cpp_process_line_notes (pfile, true);
401 if (buffer->next_line >= buffer->rlimit)
403 _cpp_clean_line (pfile);
405 cols = buffer->next_line - buffer->line_base;
406 CPP_INCREMENT_LINE (pfile, cols);
413 _cpp_process_line_notes (pfile, true);
417 /* Skip a C++ line comment, leaving buffer->cur pointing to the
418 terminating newline. Handles escaped newlines. Returns nonzero
419 if a multiline comment. */
421 skip_line_comment (cpp_reader *pfile)
423 cpp_buffer *buffer = pfile->buffer;
424 unsigned int orig_line = pfile->line_table->highest_line;
426 while (*buffer->cur != '\n')
429 _cpp_process_line_notes (pfile, true);
430 return orig_line != pfile->line_table->highest_line;
433 /* Skips whitespace, saving the next non-whitespace character. */
435 skip_whitespace (cpp_reader *pfile, cppchar_t c)
437 cpp_buffer *buffer = pfile->buffer;
438 bool saw_NUL = false;
442 /* Horizontal space always OK. */
443 if (c == ' ' || c == '\t')
445 /* Just \f \v or \0 left. */
448 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
449 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
450 CPP_BUF_COL (buffer),
451 "%s in preprocessing directive",
452 c == '\f' ? "form feed" : "vertical tab");
456 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
457 while (is_nvspace (c));
460 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
465 /* See if the characters of a number token are valid in a name (no
468 name_p (cpp_reader *pfile, const cpp_string *string)
472 for (i = 0; i < string->len; i++)
473 if (!is_idchar (string->text[i]))
479 /* After parsing an identifier or other sequence, produce a warning about
480 sequences not in NFC/NFKC. */
482 warn_about_normalization (cpp_reader *pfile,
483 const cpp_token *token,
484 const struct normalize_state *s)
486 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
487 && !pfile->state.skipping)
489 /* Make sure that the token is printed using UCNs, even
490 if we'd otherwise happily print UTF-8. */
491 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
494 sz = cpp_spell_token (pfile, token, buf, false) - buf;
495 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
496 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
497 "`%.*s' is not in NFKC", (int) sz, buf);
499 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
500 "`%.*s' is not in NFC", (int) sz, buf);
504 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
505 an identifier. FIRST is TRUE if this starts an identifier. */
507 forms_identifier_p (cpp_reader *pfile, int first,
508 struct normalize_state *state)
510 cpp_buffer *buffer = pfile->buffer;
512 if (*buffer->cur == '$')
514 if (!CPP_OPTION (pfile, dollars_in_ident))
518 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
520 CPP_OPTION (pfile, warn_dollars) = 0;
521 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
527 /* Is this a syntactically valid UCN? */
528 if (CPP_OPTION (pfile, extended_identifiers)
529 && *buffer->cur == '\\'
530 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
533 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
542 /* Lex an identifier starting at BUFFER->CUR - 1. */
543 static cpp_hashnode *
544 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
545 struct normalize_state *nst)
547 cpp_hashnode *result;
550 unsigned int hash = HT_HASHSTEP (0, *base);
552 cur = pfile->buffer->cur;
554 while (ISIDNUM (*cur))
556 hash = HT_HASHSTEP (hash, *cur);
559 pfile->buffer->cur = cur;
560 if (starts_ucn || forms_identifier_p (pfile, false, nst))
562 /* Slower version for identifiers containing UCNs (or $). */
564 while (ISIDNUM (*pfile->buffer->cur))
566 pfile->buffer->cur++;
567 NORMALIZE_STATE_UPDATE_IDNUM (nst);
569 } while (forms_identifier_p (pfile, false, nst));
570 result = _cpp_interpret_identifier (pfile, base,
571 pfile->buffer->cur - base);
576 hash = HT_HASHFINISH (hash, len);
578 result = (cpp_hashnode *)
579 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
582 /* Rarely, identifiers require diagnostics when lexed. */
583 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
584 && !pfile->state.skipping, 0))
586 /* It is allowed to poison the same identifier twice. */
587 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
588 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
591 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
592 replacement list of a variadic macro. */
593 if (result == pfile->spec_nodes.n__VA_ARGS__
594 && !pfile->state.va_args_ok)
595 cpp_error (pfile, CPP_DL_PEDWARN,
596 "__VA_ARGS__ can only appear in the expansion"
597 " of a C99 variadic macro");
604 /* Pedantic parse a number, beginning with character C, skipping embedded
605 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
606 before C. Place the result in NUMBER. */
608 pedantic_lex_number (cpp_reader *pfile, cpp_string *number)
610 #define get_effective_char(pfile) (*pfile->buffer->cur++)
611 #define BACKUP() (--pfile->buffer->cur)
613 enum num_type_e { NT_DEC, NT_HEX } num_type = NT_DEC;
614 enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
616 uchar c = *(pfile->buffer->cur - 1);
617 struct obstack *stack = &pfile->hash_table->stack;
618 cpp_buffer *buffer = pfile->buffer;
627 obstack_1grow (stack, '.');
628 c = get_effective_char(pfile);
636 obstack_1grow (stack, c);
637 c = get_effective_char(pfile);
645 obstack_1grow (stack, c);
646 c = get_effective_char(pfile);
652 obstack_1grow (stack, c);
653 c = get_effective_char(pfile);
664 if (NT_DEC == num_type)
670 obstack_1grow (stack, c);
671 c = get_effective_char(pfile);
678 obstack_1grow (stack, c);
679 c = get_effective_char(pfile);
682 else if ('E' == c || 'e' == c)
684 if (has_whole || has_fract)
688 obstack_1grow (stack, c);
689 c = get_effective_char(pfile);
702 obstack_1grow (stack, c);
703 c = get_effective_char(pfile);
710 obstack_1grow (stack, c);
711 c = get_effective_char(pfile);
714 else if ('P' == c || 'p' == c)
716 if (has_whole || has_fract)
720 obstack_1grow (stack, c);
721 c = get_effective_char(pfile);
728 num_part = NP_INT_SUFFIX;
732 if (NT_DEC == num_type)
738 obstack_1grow (stack, c);
739 c = get_effective_char(pfile);
742 if ('E' == c || 'e' == c)
744 if (has_whole || has_fract)
748 obstack_1grow (stack, c);
749 c = get_effective_char(pfile);
760 obstack_1grow (stack, c);
761 c = get_effective_char(pfile);
764 if ('P' == c || 'p' == c)
766 if (has_whole || has_fract)
770 obstack_1grow (stack, c);
771 c = get_effective_char(pfile);
776 num_part = NP_FLOAT_SUFFIX;
780 if ('+' == c || '-' == c)
783 obstack_1grow (stack, c);
784 c = get_effective_char(pfile);
790 obstack_1grow (stack, c);
791 c = get_effective_char(pfile);
794 num_part = NP_FLOAT_SUFFIX;
798 if ('L' == c || 'l' == c)
803 obstack_1grow (stack, c);
804 c = get_effective_char(pfile);
809 obstack_1grow (stack, c);
810 c = get_effective_char(pfile);
813 else if ('U' == c || 'u' == c)
816 obstack_1grow (stack, c);
817 c = get_effective_char(pfile);
821 case NP_FLOAT_SUFFIX:
822 if ('F' == c || 'f' == c)
825 obstack_1grow (stack, c);
826 c = get_effective_char(pfile);
828 else if ('L' == c || 'l' == c)
831 obstack_1grow (stack, c);
832 c = get_effective_char(pfile);
839 /* Step back over the unwanted char. */
842 number->text = obstack_finish (stack);
846 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
848 lex_number (cpp_reader *pfile, cpp_string *number,
849 struct normalize_state *nst)
855 base = pfile->buffer->cur - 1;
858 cur = pfile->buffer->cur;
860 /* N.B. ISIDNUM does not include $. */
861 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
864 NORMALIZE_STATE_UPDATE_IDNUM (nst);
867 pfile->buffer->cur = cur;
869 while (forms_identifier_p (pfile, false, nst));
871 number->len = cur - base;
872 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
873 memcpy (dest, base, number->len);
874 dest[number->len] = '\0';
878 /* Create a token of type TYPE with a literal spelling. */
880 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
881 unsigned int len, enum cpp_ttype type)
883 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
885 memcpy (dest, base, len);
888 token->val.str.len = len;
889 token->val.str.text = dest;
892 /* Lexes a string, character constant, or angle-bracketed header file
893 name. The stored string contains the spelling, including opening
894 quote and leading any leading 'L'. It returns the type of the
895 literal, or CPP_OTHER if it was not properly terminated.
897 The spelling is NUL-terminated, but it is not guaranteed that this
898 is the first NUL since embedded NULs are preserved. */
900 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
902 bool saw_NUL = false;
904 cppchar_t terminator;
909 if (terminator == 'L')
911 if (terminator == '\"')
912 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
913 else if (terminator == '\'')
914 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
916 terminator = '>', type = CPP_HEADER_NAME;
920 cppchar_t c = *cur++;
922 /* In #include-style directives, terminators are not escapable. */
923 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
925 else if (c == terminator)
937 if (saw_NUL && !pfile->state.skipping)
938 cpp_error (pfile, CPP_DL_WARNING,
939 "null character(s) preserved in literal");
941 pfile->buffer->cur = cur;
942 create_literal (pfile, token, base, cur - base, type);
945 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
946 comment blocks (when executed with -C option) and
947 _asm (SDCPP specific) blocks */
949 /* Count and copy characters from src to dest, excluding CRs:
950 CRs are automatically generated, because the output is
951 opened in TEXT mode. If dest == NULL, only count chars */
953 copy_text_chars (char *dest, const char *src, unsigned int len)
958 for (p = src; p != src + len; ++p)
973 /* SDCC _asm specific */
974 /* The stored comment includes the comment start and any terminator. */
976 save_asm (cpp_reader *pfile, cpp_token *token, const unsigned char *from)
978 #define _ASM_STR "_asm"
979 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
981 unsigned char *buffer;
982 unsigned int text_len, len;
984 len = pfile->buffer->cur - from;
985 /* + _ASM_LEN for the initial '_asm'. */
986 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
987 buffer = _cpp_unaligned_alloc (pfile, text_len);
990 token->type = CPP_ASM;
991 token->val.str.len = text_len;
992 token->val.str.text = buffer;
994 memcpy (buffer, _ASM_STR, _ASM_LEN);
995 copy_text_chars (buffer + _ASM_LEN, from, len);
998 /* The stored comment includes the comment start and any terminator. */
1000 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1003 unsigned char *buffer;
1004 unsigned int len, clen;
1006 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1008 /* C++ comments probably (not definitely) have moved past a new
1009 line, which we don't want to save in the comment. */
1010 if (is_vspace (pfile->buffer->cur[-1]))
1013 /* If we are currently in a directive, then we need to store all
1014 C++ comments as C comments internally, and so we need to
1015 allocate a little extra space in that case.
1017 Note that the only time we encounter a directive here is
1018 when we are saving comments in a "#define". */
1019 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1021 buffer = _cpp_unaligned_alloc (pfile, clen);
1023 token->type = CPP_COMMENT;
1024 token->val.str.len = clen;
1025 token->val.str.text = buffer;
1028 copy_text_chars (buffer + 1, from, len);
1030 /* Finish conversion to a C comment, if necessary. */
1031 if (pfile->state.in_directive && type == '/')
1034 buffer[clen - 2] = '*';
1035 buffer[clen - 1] = '/';
1039 /* Allocate COUNT tokens for RUN. */
1041 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1043 run->base = XNEWVEC (cpp_token, count);
1044 run->limit = run->base + count;
1048 /* Returns the next tokenrun, or creates one if there is none. */
1050 next_tokenrun (tokenrun *run)
1052 if (run->next == NULL)
1054 run->next = XNEW (tokenrun);
1055 run->next->prev = run;
1056 _cpp_init_tokenrun (run->next, 250);
1062 /* Allocate a single token that is invalidated at the same time as the
1063 rest of the tokens on the line. Has its line and col set to the
1064 same as the last lexed token, so that diagnostics appear in the
1067 _cpp_temp_token (cpp_reader *pfile)
1069 cpp_token *old, *result;
1071 old = pfile->cur_token - 1;
1072 if (pfile->cur_token == pfile->cur_run->limit)
1074 pfile->cur_run = next_tokenrun (pfile->cur_run);
1075 pfile->cur_token = pfile->cur_run->base;
1078 result = pfile->cur_token++;
1079 result->src_loc = old->src_loc;
1083 /* Lex a token into RESULT (external interface). Takes care of issues
1084 like directive handling, token lookahead, multiple include
1085 optimization and skipping. */
1087 _cpp_lex_token (cpp_reader *pfile)
1093 if (pfile->cur_token == pfile->cur_run->limit)
1095 pfile->cur_run = next_tokenrun (pfile->cur_run);
1096 pfile->cur_token = pfile->cur_run->base;
1099 if (pfile->lookaheads)
1101 pfile->lookaheads--;
1102 result = pfile->cur_token++;
1105 result = _cpp_lex_direct (pfile);
1107 if (result->flags & BOL)
1109 /* Is this a directive. If _cpp_handle_directive returns
1110 false, it is an assembler #. */
1111 if (result->type == CPP_HASH
1112 /* 6.10.3 p 11: Directives in a list of macro arguments
1113 gives undefined behavior. This implementation
1114 handles the directive as normal. */
1115 && pfile->state.parsing_args != 1
1116 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1118 if (pfile->directive_result.type == CPP_PADDING)
1122 result = &pfile->directive_result;
1127 if (pfile->cb.line_change && !pfile->state.skipping)
1128 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1131 /* We don't skip tokens in directives. */
1132 if (pfile->state.in_directive)
1135 /* Outside a directive, invalidate controlling macros. At file
1136 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1137 get here and MI optimization works. */
1138 pfile->mi_valid = false;
1140 if (!pfile->state.skipping || result->type == CPP_EOF)
1147 /* Returns true if a fresh line has been loaded. */
1149 _cpp_get_fresh_line (cpp_reader *pfile)
1153 /* We can't get a new line until we leave the current directive. */
1154 if (pfile->state.in_directive)
1159 cpp_buffer *buffer = pfile->buffer;
1161 if (!buffer->need_line)
1164 if (buffer->next_line < buffer->rlimit)
1166 _cpp_clean_line (pfile);
1170 /* First, get out of parsing arguments state. */
1171 if (pfile->state.parsing_args)
1174 /* End of buffer. Non-empty files should end in a newline. */
1175 if (buffer->buf != buffer->rlimit
1176 && buffer->next_line > buffer->rlimit
1177 && !buffer->from_stage3)
1179 /* Only warn once. */
1180 buffer->next_line = buffer->rlimit;
1181 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1182 CPP_BUF_COLUMN (buffer, buffer->cur),
1183 "no newline at end of file");
1186 return_at_eof = buffer->return_at_eof;
1187 _cpp_pop_buffer (pfile);
1188 if (pfile->buffer == NULL || return_at_eof)
1193 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1196 result->type = ELSE_TYPE; \
1197 if (*buffer->cur == CHAR) \
1198 buffer->cur++, result->type = THEN_TYPE; \
1202 /* Lex a token into pfile->cur_token, which is also incremented, to
1203 get diagnostics pointing to the correct location.
1205 Does not handle issues such as token lookahead, multiple-include
1206 optimization, directives, skipping etc. This function is only
1207 suitable for use by _cpp_lex_token, and in special cases like
1208 lex_expansion_token which doesn't care for any of these issues.
1210 When meeting a newline, returns CPP_EOF if parsing a directive,
1211 otherwise returns to the start of the token buffer if permissible.
1212 Returns the location of the lexed token. */
1214 _cpp_lex_direct (cpp_reader *pfile)
1218 const unsigned char *comment_start;
1219 cpp_token *result = pfile->cur_token++;
1223 buffer = pfile->buffer;
1224 if (buffer->need_line)
1226 if (!_cpp_get_fresh_line (pfile))
1228 result->type = CPP_EOF;
1229 if (!pfile->state.in_directive)
1231 /* Tell the compiler the line number of the EOF token. */
1232 result->src_loc = pfile->line_table->highest_line;
1233 result->flags = BOL;
1237 if (!pfile->keep_tokens)
1239 pfile->cur_run = &pfile->base_run;
1240 result = pfile->base_run.base;
1241 pfile->cur_token = result + 1;
1243 result->flags = BOL;
1244 if (pfile->state.parsing_args == 2)
1245 result->flags |= PREV_WHITE;
1247 buffer = pfile->buffer;
1249 result->src_loc = pfile->line_table->highest_line;
1252 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1253 && !pfile->overlaid_buffer)
1255 _cpp_process_line_notes (pfile, false);
1256 result->src_loc = pfile->line_table->highest_line;
1260 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1261 CPP_BUF_COLUMN (buffer, buffer->cur));
1265 case ' ': case '\t': case '\f': case '\v': case '\0':
1266 result->flags |= PREV_WHITE;
1267 skip_whitespace (pfile, c);
1271 if (buffer->cur < buffer->rlimit)
1272 CPP_INCREMENT_LINE (pfile, 0);
1273 buffer->need_line = true;
1276 case '0': case '1': case '2': case '3': case '4':
1277 case '5': case '6': case '7': case '8': case '9':
1279 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1280 result->type = CPP_NUMBER;
1281 if (CPP_OPTION(pfile, pedantic_parse_number))
1282 pedantic_lex_number (pfile, &result->val.str);
1284 lex_number (pfile, &result->val.str, &nst);
1285 warn_about_normalization (pfile, result, &nst);
1290 /* 'L' may introduce wide characters or strings. */
1291 if (*buffer->cur == '\'' || *buffer->cur == '"')
1293 lex_string (pfile, result, buffer->cur - 1);
1299 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1300 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1301 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1302 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1304 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1305 case 'G': case 'H': case 'I': case 'J': case 'K':
1306 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1307 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1309 result->type = CPP_NAME;
1311 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1312 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1314 warn_about_normalization (pfile, result, &nst);
1317 /* SDCC _asm specific */
1318 /* handle _asm ... _endasm ; */
1319 if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1321 comment_start = buffer->cur;
1322 result->type = CPP_ASM;
1323 skip_asm_block (pfile);
1324 /* Save the _asm block as a token in its own right. */
1325 save_asm (pfile, result, comment_start);
1327 /* Convert named operators to their proper types. */
1328 else if (result->val.node->flags & NODE_OPERATOR)
1330 result->flags |= NAMED_OP;
1331 result->type = (enum cpp_ttype) result->val.node->directive_index;
1337 lex_string (pfile, result, buffer->cur - 1);
1341 /* A potential block or line comment. */
1342 comment_start = buffer->cur;
1347 if (_cpp_skip_block_comment (pfile))
1348 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1350 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1351 || cpp_in_system_header (pfile)))
1353 /* Warn about comments only if pedantically GNUC89, and not
1354 in system headers. */
1355 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1356 && ! buffer->warned_cplusplus_comments)
1358 cpp_error (pfile, CPP_DL_PEDWARN,
1359 "C++ style comments are not allowed in ISO C90");
1360 cpp_error (pfile, CPP_DL_PEDWARN,
1361 "(this will be reported only once per input file)");
1362 buffer->warned_cplusplus_comments = 1;
1365 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1366 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1371 result->type = CPP_DIV_EQ;
1376 result->type = CPP_DIV;
1380 if (!pfile->state.save_comments)
1382 result->flags |= PREV_WHITE;
1383 goto update_tokens_line;
1386 /* Save the comment as a token in its own right. */
1387 save_comment (pfile, result, comment_start, c);
1391 if (pfile->state.angled_headers)
1393 lex_string (pfile, result, buffer->cur - 1);
1397 result->type = CPP_LESS;
1398 if (*buffer->cur == '=')
1399 buffer->cur++, result->type = CPP_LESS_EQ;
1400 else if (*buffer->cur == '<')
1403 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1405 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1408 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1410 else if (CPP_OPTION (pfile, digraphs))
1412 if (*buffer->cur == ':')
1415 result->flags |= DIGRAPH;
1416 result->type = CPP_OPEN_SQUARE;
1418 else if (*buffer->cur == '%')
1421 result->flags |= DIGRAPH;
1422 result->type = CPP_OPEN_BRACE;
1428 result->type = CPP_GREATER;
1429 if (*buffer->cur == '=')
1430 buffer->cur++, result->type = CPP_GREATER_EQ;
1431 else if (*buffer->cur == '>')
1434 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1436 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1439 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1444 result->type = CPP_MOD;
1445 if (*buffer->cur == '=')
1446 buffer->cur++, result->type = CPP_MOD_EQ;
1447 else if (CPP_OPTION (pfile, digraphs))
1449 if (*buffer->cur == ':')
1452 result->flags |= DIGRAPH;
1453 result->type = CPP_HASH;
1454 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1455 buffer->cur += 2, result->type = CPP_PASTE;
1457 else if (*buffer->cur == '>')
1460 result->flags |= DIGRAPH;
1461 result->type = CPP_CLOSE_BRACE;
1467 result->type = CPP_DOT;
1468 if (ISDIGIT (*buffer->cur))
1470 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1471 result->type = CPP_NUMBER;
1472 if (CPP_OPTION(pfile, pedantic_parse_number))
1473 pedantic_lex_number (pfile, &result->val.str);
1475 lex_number (pfile, &result->val.str, &nst);
1476 warn_about_normalization (pfile, result, &nst);
1478 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1479 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1480 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1481 buffer->cur++, result->type = CPP_DOT_STAR;
1485 result->type = CPP_PLUS;
1486 if (*buffer->cur == '+')
1487 buffer->cur++, result->type = CPP_PLUS_PLUS;
1488 else if (*buffer->cur == '=')
1489 buffer->cur++, result->type = CPP_PLUS_EQ;
1493 result->type = CPP_MINUS;
1494 if (*buffer->cur == '>')
1497 result->type = CPP_DEREF;
1498 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1499 buffer->cur++, result->type = CPP_DEREF_STAR;
1501 else if (*buffer->cur == '-')
1502 buffer->cur++, result->type = CPP_MINUS_MINUS;
1503 else if (*buffer->cur == '=')
1504 buffer->cur++, result->type = CPP_MINUS_EQ;
1508 result->type = CPP_AND;
1509 if (*buffer->cur == '&')
1510 buffer->cur++, result->type = CPP_AND_AND;
1511 else if (*buffer->cur == '=')
1512 buffer->cur++, result->type = CPP_AND_EQ;
1516 result->type = CPP_OR;
1517 if (*buffer->cur == '|')
1518 buffer->cur++, result->type = CPP_OR_OR;
1519 else if (*buffer->cur == '=')
1520 buffer->cur++, result->type = CPP_OR_EQ;
1524 result->type = CPP_COLON;
1525 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1526 buffer->cur++, result->type = CPP_SCOPE;
1527 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1530 result->flags |= DIGRAPH;
1531 result->type = CPP_CLOSE_SQUARE;
1535 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1536 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1537 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1538 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1539 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1541 case '?': result->type = CPP_QUERY; break;
1542 case '~': result->type = CPP_COMPL; break;
1543 case ',': result->type = CPP_COMMA; break;
1544 case '(': result->type = CPP_OPEN_PAREN; break;
1545 case ')': result->type = CPP_CLOSE_PAREN; break;
1546 case '[': result->type = CPP_OPEN_SQUARE; break;
1547 case ']': result->type = CPP_CLOSE_SQUARE; break;
1548 case '{': result->type = CPP_OPEN_BRACE; break;
1549 case '}': result->type = CPP_CLOSE_BRACE; break;
1550 case ';': result->type = CPP_SEMICOLON; break;
1552 /* @ is a punctuator in Objective-C. */
1553 case '@': result->type = CPP_ATSIGN; break;
1558 const uchar *base = --buffer->cur;
1559 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1561 if (forms_identifier_p (pfile, true, &nst))
1563 result->type = CPP_NAME;
1564 result->val.node = lex_identifier (pfile, base, true, &nst);
1565 warn_about_normalization (pfile, result, &nst);
1572 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1579 /* An upper bound on the number of bytes needed to spell TOKEN.
1580 Does not include preceding whitespace. */
1582 cpp_token_len (const cpp_token *token)
1586 switch (TOKEN_SPELL (token))
1588 default: len = 4; break;
1589 case SPELL_LITERAL: len = token->val.str.len; break;
1590 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1596 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1597 Return the number of bytes read out of NAME. (There are always
1598 10 bytes written to BUFFER.) */
1601 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1607 unsigned long utf32;
1609 /* Compute the length of the UTF-8 sequence. */
1610 for (t = *name; t & 0x80; t <<= 1)
1613 utf32 = *name & (0x7F >> ucn_len);
1614 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1616 utf32 = (utf32 << 6) | (*++name & 0x3F);
1618 /* Ill-formed UTF-8. */
1619 if ((*name & ~0x3F) != 0x80)
1625 for (j = 7; j >= 0; j--)
1626 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1631 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1632 already contain the enough space to hold the token's spelling.
1633 Returns a pointer to the character after the last character written.
1634 FORSTRING is true if this is to be the spelling after translation
1635 phase 1 (this is different for UCNs).
1636 FIXME: Would be nice if we didn't need the PFILE argument. */
1638 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1639 unsigned char *buffer, bool forstring)
1641 switch (TOKEN_SPELL (token))
1643 case SPELL_OPERATOR:
1645 const unsigned char *spelling;
1648 if (token->flags & DIGRAPH)
1650 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1651 else if (token->flags & NAMED_OP)
1654 spelling = TOKEN_NAME (token);
1656 while ((c = *spelling++) != '\0')
1665 memcpy (buffer, NODE_NAME (token->val.node),
1666 NODE_LEN (token->val.node));
1667 buffer += NODE_LEN (token->val.node);
1672 const unsigned char * name = NODE_NAME (token->val.node);
1674 for (i = 0; i < NODE_LEN (token->val.node); i++)
1675 if (name[i] & ~0x7F)
1677 i += utf8_to_ucn (buffer, name + i) - 1;
1681 *buffer++ = NODE_NAME (token->val.node)[i];
1686 memcpy (buffer, token->val.str.text, token->val.str.len);
1687 buffer += token->val.str.len;
1691 cpp_error (pfile, CPP_DL_ICE,
1692 "unspellable token %s", TOKEN_NAME (token));
1699 /* Returns TOKEN spelt as a null-terminated string. The string is
1700 freed when the reader is destroyed. Useful for diagnostics. */
1702 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1704 unsigned int len = cpp_token_len (token) + 1;
1705 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1707 end = cpp_spell_token (pfile, token, start, false);
1713 /* Used by C front ends, which really should move to using
1714 cpp_token_as_text. */
1716 cpp_type2name (enum cpp_ttype type)
1718 return (const char *) token_spellings[type].name;
1721 /* Writes the spelling of token to FP, without any preceding space.
1722 Separated from cpp_spell_token for efficiency - to avoid stdio
1723 double-buffering. */
1725 cpp_output_token (const cpp_token *token, FILE *fp)
1727 switch (TOKEN_SPELL (token))
1729 case SPELL_OPERATOR:
1731 const unsigned char *spelling;
1734 if (token->flags & DIGRAPH)
1736 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1737 else if (token->flags & NAMED_OP)
1740 spelling = TOKEN_NAME (token);
1745 while ((c = *++spelling) != '\0');
1753 const unsigned char * name = NODE_NAME (token->val.node);
1755 for (i = 0; i < NODE_LEN (token->val.node); i++)
1756 if (name[i] & ~0x7F)
1758 unsigned char buffer[10];
1759 i += utf8_to_ucn (buffer, name + i) - 1;
1760 fwrite (buffer, 1, 10, fp);
1763 fputc (NODE_NAME (token->val.node)[i], fp);
1768 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1772 /* An error, most probably. */
1777 /* Compare two tokens. */
1779 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1781 if (a->type == b->type && a->flags == b->flags)
1782 switch (TOKEN_SPELL (a))
1784 default: /* Keep compiler happy. */
1785 case SPELL_OPERATOR:
1788 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1790 return a->val.node == b->val.node;
1792 return (a->val.str.len == b->val.str.len
1793 && !memcmp (a->val.str.text, b->val.str.text,
1800 /* Returns nonzero if a space should be inserted to avoid an
1801 accidental token paste for output. For simplicity, it is
1802 conservative, and occasionally advises a space where one is not
1803 needed, e.g. "." and ".2". */
1805 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1806 const cpp_token *token2)
1808 enum cpp_ttype a = token1->type, b = token2->type;
1811 if (token1->flags & NAMED_OP)
1813 if (token2->flags & NAMED_OP)
1817 if (token2->flags & DIGRAPH)
1818 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1819 else if (token_spellings[b].category == SPELL_OPERATOR)
1820 c = token_spellings[b].name[0];
1822 /* Quickly get everything that can paste with an '='. */
1823 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1828 case CPP_GREATER: return c == '>' || c == '?';
1829 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1830 case CPP_PLUS: return c == '+';
1831 case CPP_MINUS: return c == '-' || c == '>';
1832 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1833 case CPP_MOD: return c == ':' || c == '>';
1834 case CPP_AND: return c == '&';
1835 case CPP_OR: return c == '|';
1836 case CPP_COLON: return c == ':' || c == '>';
1837 case CPP_DEREF: return c == '*';
1838 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1839 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1840 case CPP_NAME: return ((b == CPP_NUMBER
1841 && name_p (pfile, &token2->val.str))
1843 || b == CPP_CHAR || b == CPP_STRING); /* L */
1844 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1845 || c == '.' || c == '+' || c == '-');
1847 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1849 || (CPP_OPTION (pfile, objc)
1850 && token1->val.str.text[0] == '@'
1851 && (b == CPP_NAME || b == CPP_STRING)));
1858 /* Output all the remaining tokens on the current line, and a newline
1859 character, to FP. Leading whitespace is removed. If there are
1860 macros, special token padding is not performed. */
1862 cpp_output_line (cpp_reader *pfile, FILE *fp)
1864 const cpp_token *token;
1866 token = cpp_get_token (pfile);
1867 while (token->type != CPP_EOF)
1869 cpp_output_token (token, fp);
1870 token = cpp_get_token (pfile);
1871 if (token->flags & PREV_WHITE)
1878 /* Memory buffers. Changing these three constants can have a dramatic
1879 effect on performance. The values here are reasonable defaults,
1880 but might be tuned. If you adjust them, be sure to test across a
1881 range of uses of cpplib, including heavy nested function-like macro
1882 expansion. Also check the change in peak memory usage (NJAMD is a
1883 good tool for this). */
1884 #define MIN_BUFF_SIZE 8000
1885 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1886 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1887 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1889 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1890 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1893 /* Create a new allocation buffer. Place the control block at the end
1894 of the buffer, so that buffer overflows will cause immediate chaos. */
1896 new_buff (size_t len)
1899 unsigned char *base;
1901 if (len < MIN_BUFF_SIZE)
1902 len = MIN_BUFF_SIZE;
1903 len = CPP_ALIGN (len);
1905 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1906 result = (_cpp_buff *) (base + len);
1907 result->base = base;
1909 result->limit = base + len;
1910 result->next = NULL;
1914 /* Place a chain of unwanted allocation buffers on the free list. */
1916 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1918 _cpp_buff *end = buff;
1922 end->next = pfile->free_buffs;
1923 pfile->free_buffs = buff;
1926 /* Return a free buffer of size at least MIN_SIZE. */
1928 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1930 _cpp_buff *result, **p;
1932 for (p = &pfile->free_buffs;; p = &(*p)->next)
1937 return new_buff (min_size);
1939 size = result->limit - result->base;
1940 /* Return a buffer that's big enough, but don't waste one that's
1942 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1947 result->next = NULL;
1948 result->cur = result->base;
1952 /* Creates a new buffer with enough space to hold the uncommitted
1953 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1954 the excess bytes to the new buffer. Chains the new buffer after
1955 BUFF, and returns the new buffer. */
1957 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1959 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1960 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1962 buff->next = new_buff;
1963 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1967 /* Creates a new buffer with enough space to hold the uncommitted
1968 remaining bytes of the buffer pointed to by BUFF, and at least
1969 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1970 Chains the new buffer before the buffer pointed to by BUFF, and
1971 updates the pointer to point to the new buffer. */
1973 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1975 _cpp_buff *new_buff, *old_buff = *pbuff;
1976 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1978 new_buff = _cpp_get_buff (pfile, size);
1979 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1980 new_buff->next = old_buff;
1984 /* Free a chain of buffers starting at BUFF. */
1986 _cpp_free_buff (_cpp_buff *buff)
1990 for (; buff; buff = next)
1997 /* Allocate permanent, unaligned storage of length LEN. */
1999 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2001 _cpp_buff *buff = pfile->u_buff;
2002 unsigned char *result = buff->cur;
2004 if (len > (size_t) (buff->limit - result))
2006 buff = _cpp_get_buff (pfile, len);
2007 buff->next = pfile->u_buff;
2008 pfile->u_buff = buff;
2012 buff->cur = result + len;
2016 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2017 That buffer is used for growing allocations when saving macro
2018 replacement lists in a #define, and when parsing an answer to an
2019 assertion in #assert, #unassert or #if (and therefore possibly
2020 whilst expanding macros). It therefore must not be used by any
2021 code that they might call: specifically the lexer and the guts of
2024 All existing other uses clearly fit this restriction: storing
2025 registered pragmas during initialization. */
2027 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2029 _cpp_buff *buff = pfile->a_buff;
2030 unsigned char *result = buff->cur;
2032 if (len > (size_t) (buff->limit - result))
2034 buff = _cpp_get_buff (pfile, len);
2035 buff->next = pfile->a_buff;
2036 pfile->a_buff = buff;
2040 buff->cur = result + len;
2044 /* Say which field of TOK is in use. */
2046 enum cpp_token_fld_kind
2047 cpp_token_val_index (cpp_token *tok)
2049 switch (TOKEN_SPELL (tok))
2052 return CPP_TOKEN_FLD_NODE;
2054 return CPP_TOKEN_FLD_STR;
2056 if (tok->type == CPP_MACRO_ARG)
2057 return CPP_TOKEN_FLD_ARG_NO;
2058 else if (tok->type == CPP_PADDING)
2059 return CPP_TOKEN_FLD_SOURCE;
2060 else if (tok->type == CPP_PRAGMA)
2061 return CPP_TOKEN_FLD_STR;
2062 /* else fall through */
2064 return CPP_TOKEN_FLD_NONE;