1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
38 enum spell_type category;
39 const unsigned char *name;
42 static const unsigned char *const digraph_spellings[] =
43 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, U s },
46 #define TK(e, s) { SPELL_ ## s, U #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
65 static _cpp_buff *new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token *token, const char *string)
75 if (token->type != CPP_NAME)
78 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 if (buffer->notes_used == buffer->notes_cap)
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
98 /* Returns with a logical line that contains no escaped newlines or
99 trigraphs. This is a time-critical inner loop. */
101 _cpp_clean_line (cpp_reader *pfile)
107 buffer = pfile->buffer;
108 buffer->cur_note = buffer->notes_used = 0;
109 buffer->cur = buffer->line_base = buffer->next_line;
110 buffer->need_line = false;
111 s = buffer->next_line - 1;
113 if (!buffer->from_stage3)
115 const uchar *pbackslash = NULL;
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
123 if (__builtin_expect (c == '\n', false)
124 || __builtin_expect (c == '\r', false))
128 if (__builtin_expect (s == buffer->rlimit, false))
131 /* DOS line ending? */
132 if (__builtin_expect (c == '\r', false)
136 if (s == buffer->rlimit)
140 if (__builtin_expect (pbackslash == NULL, true))
143 /* Check for escaped newline. */
145 while (is_nvspace (p[-1]))
147 if (p - 1 != pbackslash)
150 /* Have an escaped newline; process it and proceed to
152 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 buffer->next_line = p - 1;
157 if (__builtin_expect (c == '\\', false))
159 else if (__builtin_expect (c == '?', false)
160 && __builtin_expect (s[1] == '?', false)
161 && _cpp_trigraph_map[s[2]])
163 /* Have a trigraph. We may or may not have to convert
164 it. Add a line note regardless, for -Wtrigraphs. */
165 add_line_note (buffer, s, s[2]);
166 if (CPP_OPTION (pfile, trigraphs))
168 /* We do, and that means we have to switch to the
171 *d = _cpp_trigraph_map[s[2]];
184 if (c == '\n' || c == '\r')
186 /* Handle DOS line endings. */
187 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 if (s == buffer->rlimit)
194 while (p != buffer->next_line && is_nvspace (p[-1]))
196 if (p == buffer->next_line || p[-1] != '\\')
199 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
201 buffer->next_line = p - 1;
203 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205 /* Add a note regardless, for the benefit of -Wtrigraphs. */
206 add_line_note (buffer, d, s[2]);
207 if (CPP_OPTION (pfile, trigraphs))
209 *d = _cpp_trigraph_map[s[2]];
219 while (*s != '\n' && *s != '\r');
222 /* Handle DOS line endings. */
223 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
229 /* A sentinel note that should never be processed. */
230 add_line_note (buffer, d + 1, '\n');
231 buffer->next_line = s + 1;
234 /* Return true if the trigraph indicated by NOTE should be warned
235 about in a comment. */
237 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
241 /* Within comments we don't warn about trigraphs, unless the
242 trigraph forms an escaped newline, as that may change
244 if (note->type != '/')
247 /* If -trigraphs, then this was an escaped newline iff the next note
249 if (CPP_OPTION (pfile, trigraphs))
250 return note[1].pos == note->pos;
252 /* Otherwise, see if this forms an escaped newline. */
254 while (is_nvspace (*p))
257 /* There might have been escaped newlines between the trigraph and the
258 newline we found. Hence the position test. */
259 return (*p == '\n' && p < note[1].pos);
262 /* Process the notes created by add_line_note as far as the current
265 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
267 cpp_buffer *buffer = pfile->buffer;
271 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
274 if (note->pos > buffer->cur)
278 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
280 if (note->type == '\\' || note->type == ' ')
282 if (note->type == ' ' && !in_comment)
283 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
284 "backslash and newline separated by space");
286 if (buffer->next_line > buffer->rlimit)
288 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
289 "backslash-newline at end of file");
290 /* Prevent "no newline at end of file" warning. */
291 buffer->next_line = buffer->rlimit;
294 buffer->line_base = note->pos;
295 CPP_INCREMENT_LINE (pfile, 0);
297 else if (_cpp_trigraph_map[note->type])
299 if (CPP_OPTION (pfile, warn_trigraphs)
300 && (!in_comment || warn_in_comment (pfile, note)))
302 if (CPP_OPTION (pfile, trigraphs))
303 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
304 "trigraph ??%c converted to %c",
306 (int) _cpp_trigraph_map[note->type]);
310 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
311 "trigraph ??%c ignored, use -trigraphs to enable",
321 /* SDCC _asm specific */
322 /* Skip an _asm ... _endasm block. We find the end of the comment by
323 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
326 skip_asm_block (cpp_reader *pfile)
328 #define _ENDASM_STR "endasm"
329 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
331 cpp_buffer *buffer = pfile->buffer;
336 while (buffer->cur != buffer->rlimit)
338 prev_space = is_space(c);
341 if (prev_space && c == '_')
343 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
344 strncmp((char *)buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
346 buffer->cur += _ENDASM_LEN;
355 _cpp_process_line_notes (pfile, true);
356 if (buffer->next_line >= buffer->rlimit)
358 _cpp_clean_line (pfile);
360 cols = buffer->next_line - buffer->line_base;
361 CPP_INCREMENT_LINE (pfile, cols);
365 _cpp_process_line_notes (pfile, true);
369 /* Skip a C-style block comment. We find the end of the comment by
370 seeing if an asterisk is before every '/' we encounter. Returns
371 nonzero if comment terminated by EOF, zero otherwise.
373 Buffer->cur points to the initial asterisk of the comment. */
375 _cpp_skip_block_comment (cpp_reader *pfile)
377 cpp_buffer *buffer = pfile->buffer;
378 const uchar *cur = buffer->cur;
387 /* People like decorating comments with '*', so check for '/'
388 instead for efficiency. */
396 /* Warn about potential nested comments, but not if the '/'
397 comes immediately before the true comment delimiter.
398 Don't bother to get it right across escaped newlines. */
399 if (CPP_OPTION (pfile, warn_comments)
400 && cur[0] == '*' && cur[1] != '/')
403 cpp_error_with_line (pfile, CPP_DL_WARNING,
404 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
405 "\"/*\" within comment");
411 buffer->cur = cur - 1;
412 _cpp_process_line_notes (pfile, true);
413 if (buffer->next_line >= buffer->rlimit)
415 _cpp_clean_line (pfile);
417 cols = buffer->next_line - buffer->line_base;
418 CPP_INCREMENT_LINE (pfile, cols);
425 _cpp_process_line_notes (pfile, true);
429 /* Skip a C++ line comment, leaving buffer->cur pointing to the
430 terminating newline. Handles escaped newlines. Returns nonzero
431 if a multiline comment. */
433 skip_line_comment (cpp_reader *pfile)
435 cpp_buffer *buffer = pfile->buffer;
436 unsigned int orig_line = pfile->line_table->highest_line;
438 while (*buffer->cur != '\n')
441 _cpp_process_line_notes (pfile, true);
442 return orig_line != pfile->line_table->highest_line;
445 /* Skips whitespace, saving the next non-whitespace character. */
447 skip_whitespace (cpp_reader *pfile, cppchar_t c)
449 cpp_buffer *buffer = pfile->buffer;
450 bool saw_NUL = false;
454 /* Horizontal space always OK. */
455 if (c == ' ' || c == '\t')
457 /* Just \f \v or \0 left. */
460 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
461 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
462 CPP_BUF_COL (buffer),
463 "%s in preprocessing directive",
464 c == '\f' ? "form feed" : "vertical tab");
468 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
469 while (is_nvspace (c));
472 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
477 /* See if the characters of a number token are valid in a name (no
480 name_p (cpp_reader *pfile, const cpp_string *string)
484 for (i = 0; i < string->len; i++)
485 if (!is_idchar (string->text[i]))
491 /* After parsing an identifier or other sequence, produce a warning about
492 sequences not in NFC/NFKC. */
494 warn_about_normalization (cpp_reader *pfile,
495 const cpp_token *token,
496 const struct normalize_state *s)
498 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
499 && !pfile->state.skipping)
501 /* Make sure that the token is printed using UCNs, even
502 if we'd otherwise happily print UTF-8. */
503 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
506 sz = cpp_spell_token (pfile, token, buf, false) - buf;
507 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
508 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
509 "`%.*s' is not in NFKC", (int) sz, buf);
511 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
512 "`%.*s' is not in NFC", (int) sz, buf);
516 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
517 an identifier. FIRST is TRUE if this starts an identifier. */
519 forms_identifier_p (cpp_reader *pfile, int first,
520 struct normalize_state *state)
522 cpp_buffer *buffer = pfile->buffer;
524 if (*buffer->cur == '$')
526 if (!CPP_OPTION (pfile, dollars_in_ident))
530 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
532 CPP_OPTION (pfile, warn_dollars) = 0;
533 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
539 /* Is this a syntactically valid UCN? */
540 if (CPP_OPTION (pfile, extended_identifiers)
541 && *buffer->cur == '\\'
542 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
545 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
554 /* Lex an identifier starting at BUFFER->CUR - 1. */
555 static cpp_hashnode *
556 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
557 struct normalize_state *nst)
559 cpp_hashnode *result;
562 unsigned int hash = HT_HASHSTEP (0, *base);
564 cur = pfile->buffer->cur;
566 while (ISIDNUM (*cur))
568 hash = HT_HASHSTEP (hash, *cur);
571 pfile->buffer->cur = cur;
572 if (starts_ucn || forms_identifier_p (pfile, false, nst))
574 /* Slower version for identifiers containing UCNs (or $). */
576 while (ISIDNUM (*pfile->buffer->cur))
578 pfile->buffer->cur++;
579 NORMALIZE_STATE_UPDATE_IDNUM (nst);
581 } while (forms_identifier_p (pfile, false, nst));
582 result = _cpp_interpret_identifier (pfile, base,
583 pfile->buffer->cur - base);
588 hash = HT_HASHFINISH (hash, len);
590 result = (cpp_hashnode *)
591 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
594 /* Rarely, identifiers require diagnostics when lexed. */
595 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
596 && !pfile->state.skipping, 0))
598 /* It is allowed to poison the same identifier twice. */
599 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
600 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
603 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
604 replacement list of a variadic macro. */
605 if (result == pfile->spec_nodes.n__VA_ARGS__
606 && !pfile->state.va_args_ok)
607 cpp_error (pfile, CPP_DL_PEDWARN,
608 "__VA_ARGS__ can only appear in the expansion"
609 " of a C99 variadic macro");
616 /* Pedantic parse a number, beginning with character C, skipping embedded
617 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
618 before C. Place the result in NUMBER. */
620 pedantic_lex_number (cpp_reader *pfile, cpp_string *number)
622 #define get_effective_char(pfile) (*pfile->buffer->cur++)
623 #define BACKUP() (--pfile->buffer->cur)
625 enum num_type_e { NT_DEC, NT_HEX, NT_BIN } num_type = NT_DEC;
626 enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
628 uchar c = *(pfile->buffer->cur - 1);
629 struct obstack *stack = &pfile->hash_table->stack;
638 obstack_1grow (stack, '.');
639 c = get_effective_char (pfile);
647 obstack_1grow (stack, c);
648 c = get_effective_char (pfile);
656 obstack_1grow (stack, c);
657 c = get_effective_char (pfile);
662 if (!CPP_OPTION (pfile, std))
666 obstack_1grow (stack, c);
667 c = get_effective_char (pfile);
674 obstack_1grow (stack, c);
675 c = get_effective_char (pfile);
686 if (NT_DEC == num_type)
692 obstack_1grow (stack, c);
693 c = get_effective_char (pfile);
700 obstack_1grow (stack, c);
701 c = get_effective_char (pfile);
704 else if ('E' == c || 'e' == c)
706 if (has_whole || has_fract)
710 obstack_1grow (stack, c);
711 c = get_effective_char (pfile);
718 else if (NT_HEX == num_type)
724 obstack_1grow (stack, c);
725 c = get_effective_char (pfile);
732 obstack_1grow (stack, c);
733 c = get_effective_char (pfile);
736 else if ('P' == c || 'p' == c)
738 if (has_whole || has_fract)
742 obstack_1grow (stack, c);
743 c = get_effective_char (pfile);
750 else /* (NT_BIN == num_type) */
752 while ((c=='0') || (c=='1'))
756 obstack_1grow (stack, c);
757 c = get_effective_char (pfile);
764 obstack_1grow (stack, c);
765 c = get_effective_char (pfile);
768 else if ('P' == c || 'p' == c)
770 if (has_whole || has_fract)
774 obstack_1grow (stack, c);
775 c = get_effective_char (pfile);
782 num_part = NP_INT_SUFFIX;
786 if (NT_DEC == num_type)
792 obstack_1grow (stack, c);
793 c = get_effective_char (pfile);
796 if ('E' == c || 'e' == c)
798 if (has_whole || has_fract)
802 obstack_1grow (stack, c);
803 c = get_effective_char (pfile);
814 obstack_1grow (stack, c);
815 c = get_effective_char (pfile);
818 if ('P' == c || 'p' == c)
820 if (has_whole || has_fract)
824 obstack_1grow (stack, c);
825 c = get_effective_char (pfile);
830 num_part = NP_FLOAT_SUFFIX;
834 if ('+' == c || '-' == c)
837 obstack_1grow (stack, c);
838 c = get_effective_char (pfile);
844 obstack_1grow (stack, c);
845 c = get_effective_char (pfile);
848 num_part = NP_FLOAT_SUFFIX;
852 if ('L' == c || 'l' == c)
857 obstack_1grow (stack, c);
858 c = get_effective_char (pfile);
863 obstack_1grow (stack, c);
864 c = get_effective_char (pfile);
867 else if ('U' == c || 'u' == c)
870 obstack_1grow (stack, c);
871 c = get_effective_char (pfile);
875 case NP_FLOAT_SUFFIX:
876 if ('F' == c || 'f' == c)
879 obstack_1grow (stack, c);
880 c = get_effective_char (pfile);
882 else if ('L' == c || 'l' == c)
885 obstack_1grow (stack, c);
886 c = get_effective_char (pfile);
893 /* Step back over the unwanted char. */
896 number->text = obstack_finish (stack);
900 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
902 lex_number (cpp_reader *pfile, cpp_string *number,
903 struct normalize_state *nst)
909 base = pfile->buffer->cur - 1;
912 cur = pfile->buffer->cur;
914 /* N.B. ISIDNUM does not include $. */
915 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
918 NORMALIZE_STATE_UPDATE_IDNUM (nst);
921 pfile->buffer->cur = cur;
923 while (forms_identifier_p (pfile, false, nst));
925 number->len = cur - base;
926 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
927 memcpy (dest, base, number->len);
928 dest[number->len] = '\0';
932 /* Create a token of type TYPE with a literal spelling. */
934 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
935 unsigned int len, enum cpp_ttype type)
937 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
939 memcpy (dest, base, len);
942 token->val.str.len = len;
943 token->val.str.text = dest;
946 /* Lexes a string, character constant, or angle-bracketed header file
947 name. The stored string contains the spelling, including opening
948 quote and leading any leading 'L'. It returns the type of the
949 literal, or CPP_OTHER if it was not properly terminated.
951 The spelling is NUL-terminated, but it is not guaranteed that this
952 is the first NUL since embedded NULs are preserved. */
954 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
956 bool saw_NUL = false;
958 cppchar_t terminator;
963 if (terminator == 'L')
965 if (terminator == '\"')
966 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
967 else if (terminator == '\'')
968 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
970 terminator = '>', type = CPP_HEADER_NAME;
974 cppchar_t c = *cur++;
976 /* In #include-style directives, terminators are not escapable. */
977 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
979 else if (c == terminator)
991 if (saw_NUL && !pfile->state.skipping)
992 cpp_error (pfile, CPP_DL_WARNING,
993 "null character(s) preserved in literal");
995 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
996 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
999 pfile->buffer->cur = cur;
1000 create_literal (pfile, token, base, cur - base, type);
1003 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
1004 comment blocks (when executed with -C option) and
1005 _asm (SDCPP specific) blocks */
1007 /* Count and copy characters from src to dest, excluding CRs:
1008 CRs are automatically generated, because the output is
1009 opened in TEXT mode. If dest == NULL, only count chars */
1011 copy_text_chars (unsigned char *dest, const unsigned char *src, unsigned int len)
1014 const unsigned char *p;
1016 for (p = src; p != src + len; ++p)
1031 /* SDCC _asm specific */
1032 /* The stored comment includes the comment start and any terminator. */
1034 save_asm (cpp_reader *pfile, cpp_token *token, const unsigned char *from)
1036 #define _ASM_STR "_asm"
1037 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
1039 unsigned char *buffer;
1040 unsigned int text_len, len;
1042 len = pfile->buffer->cur - from;
1043 /* + _ASM_LEN for the initial '_asm'. */
1044 text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
1045 buffer = _cpp_unaligned_alloc (pfile, text_len);
1048 token->type = CPP_ASM;
1049 token->val.str.len = text_len;
1050 token->val.str.text = buffer;
1052 memcpy (buffer, _ASM_STR, _ASM_LEN);
1053 copy_text_chars (buffer + _ASM_LEN, from, len);
1056 /* The stored comment includes the comment start and any terminator. */
1058 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1061 unsigned char *buffer;
1062 unsigned int len, clen;
1064 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1066 /* C++ comments probably (not definitely) have moved past a new
1067 line, which we don't want to save in the comment. */
1068 if (is_vspace (pfile->buffer->cur[-1]))
1071 /* If we are currently in a directive, then we need to store all
1072 C++ comments as C comments internally, and so we need to
1073 allocate a little extra space in that case.
1075 Note that the only time we encounter a directive here is
1076 when we are saving comments in a "#define". */
1077 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1079 buffer = _cpp_unaligned_alloc (pfile, clen);
1081 token->type = CPP_COMMENT;
1082 token->val.str.len = clen;
1083 token->val.str.text = buffer;
1086 copy_text_chars (buffer + 1, from, len);
1088 /* Finish conversion to a C comment, if necessary. */
1089 if (pfile->state.in_directive && type == '/')
1092 buffer[clen - 2] = '*';
1093 buffer[clen - 1] = '/';
1097 /* Allocate COUNT tokens for RUN. */
1099 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1101 run->base = XNEWVEC (cpp_token, count);
1102 run->limit = run->base + count;
1106 /* Returns the next tokenrun, or creates one if there is none. */
1108 next_tokenrun (tokenrun *run)
1110 if (run->next == NULL)
1112 run->next = XNEW (tokenrun);
1113 run->next->prev = run;
1114 _cpp_init_tokenrun (run->next, 250);
1120 /* Allocate a single token that is invalidated at the same time as the
1121 rest of the tokens on the line. Has its line and col set to the
1122 same as the last lexed token, so that diagnostics appear in the
1125 _cpp_temp_token (cpp_reader *pfile)
1127 cpp_token *old, *result;
1129 old = pfile->cur_token - 1;
1130 if (pfile->cur_token == pfile->cur_run->limit)
1132 pfile->cur_run = next_tokenrun (pfile->cur_run);
1133 pfile->cur_token = pfile->cur_run->base;
1136 result = pfile->cur_token++;
1137 result->src_loc = old->src_loc;
1141 /* Lex a token into RESULT (external interface). Takes care of issues
1142 like directive handling, token lookahead, multiple include
1143 optimization and skipping. */
1145 _cpp_lex_token (cpp_reader *pfile)
1151 if (pfile->cur_token == pfile->cur_run->limit)
1153 pfile->cur_run = next_tokenrun (pfile->cur_run);
1154 pfile->cur_token = pfile->cur_run->base;
1156 /* We assume that the current token is somewhere in the current
1158 if (pfile->cur_token < pfile->cur_run->base
1159 || pfile->cur_token >= pfile->cur_run->limit)
1162 if (pfile->lookaheads)
1164 pfile->lookaheads--;
1165 result = pfile->cur_token++;
1168 result = _cpp_lex_direct (pfile);
1170 if (result->flags & BOL)
1172 /* Is this a directive. If _cpp_handle_directive returns
1173 false, it is an assembler #. */
1174 if (result->type == CPP_HASH
1175 /* 6.10.3 p 11: Directives in a list of macro arguments
1176 gives undefined behavior. This implementation
1177 handles the directive as normal. */
1178 && pfile->state.parsing_args != 1)
1180 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1182 if (pfile->directive_result.type == CPP_PADDING)
1184 result = &pfile->directive_result;
1187 else if (pfile->state.in_deferred_pragma)
1188 result = &pfile->directive_result;
1190 if (pfile->cb.line_change && !pfile->state.skipping)
1191 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1194 /* We don't skip tokens in directives. */
1195 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1198 /* Outside a directive, invalidate controlling macros. At file
1199 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1200 get here and MI optimization works. */
1201 pfile->mi_valid = false;
1203 if (!pfile->state.skipping || result->type == CPP_EOF)
1210 /* Returns true if a fresh line has been loaded. */
1212 _cpp_get_fresh_line (cpp_reader *pfile)
1216 /* We can't get a new line until we leave the current directive. */
1217 if (pfile->state.in_directive)
1222 cpp_buffer *buffer = pfile->buffer;
1224 if (!buffer->need_line)
1227 if (buffer->next_line < buffer->rlimit)
1229 _cpp_clean_line (pfile);
1233 /* First, get out of parsing arguments state. */
1234 if (pfile->state.parsing_args)
1237 /* End of buffer. Non-empty files should end in a newline. */
1238 if (buffer->buf != buffer->rlimit
1239 && buffer->next_line > buffer->rlimit
1240 && !buffer->from_stage3)
1242 /* Clip to buffer size. */
1243 buffer->next_line = buffer->rlimit;
1246 return_at_eof = buffer->return_at_eof;
1247 _cpp_pop_buffer (pfile);
1248 if (pfile->buffer == NULL || return_at_eof)
1253 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1256 result->type = ELSE_TYPE; \
1257 if (*buffer->cur == CHAR) \
1258 buffer->cur++, result->type = THEN_TYPE; \
1262 /* Lex a token into pfile->cur_token, which is also incremented, to
1263 get diagnostics pointing to the correct location.
1265 Does not handle issues such as token lookahead, multiple-include
1266 optimization, directives, skipping etc. This function is only
1267 suitable for use by _cpp_lex_token, and in special cases like
1268 lex_expansion_token which doesn't care for any of these issues.
1270 When meeting a newline, returns CPP_EOF if parsing a directive,
1271 otherwise returns to the start of the token buffer if permissible.
1272 Returns the location of the lexed token. */
1274 _cpp_lex_direct (cpp_reader *pfile)
1278 const unsigned char *comment_start;
1279 cpp_token *result = pfile->cur_token++;
1283 buffer = pfile->buffer;
1284 if (buffer->need_line)
1286 if (pfile->state.in_deferred_pragma)
1288 result->type = CPP_PRAGMA_EOL;
1289 pfile->state.in_deferred_pragma = false;
1290 if (!pfile->state.pragma_allow_expansion)
1291 pfile->state.prevent_expansion--;
1294 if (!_cpp_get_fresh_line (pfile))
1296 result->type = CPP_EOF;
1297 if (!pfile->state.in_directive)
1299 /* Tell the compiler the line number of the EOF token. */
1300 result->src_loc = pfile->line_table->highest_line;
1301 result->flags = BOL;
1305 if (!pfile->keep_tokens)
1307 pfile->cur_run = &pfile->base_run;
1308 result = pfile->base_run.base;
1309 pfile->cur_token = result + 1;
1311 result->flags = BOL;
1312 if (pfile->state.parsing_args == 2)
1313 result->flags |= PREV_WHITE;
1315 buffer = pfile->buffer;
1317 result->src_loc = pfile->line_table->highest_line;
1320 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1321 && !pfile->overlaid_buffer)
1323 _cpp_process_line_notes (pfile, false);
1324 result->src_loc = pfile->line_table->highest_line;
1328 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1329 CPP_BUF_COLUMN (buffer, buffer->cur));
1333 case ' ': case '\t': case '\f': case '\v': case '\0':
1334 result->flags |= PREV_WHITE;
1335 skip_whitespace (pfile, c);
1339 if (buffer->cur < buffer->rlimit)
1340 CPP_INCREMENT_LINE (pfile, 0);
1341 buffer->need_line = true;
1344 case '0': case '1': case '2': case '3': case '4':
1345 case '5': case '6': case '7': case '8': case '9':
1347 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1348 result->type = CPP_NUMBER;
1349 if (CPP_OPTION (pfile, pedantic_parse_number))
1350 pedantic_lex_number (pfile, &result->val.str);
1352 lex_number (pfile, &result->val.str, &nst);
1353 warn_about_normalization (pfile, result, &nst);
1358 /* 'L' may introduce wide characters or strings. */
1359 if (*buffer->cur == '\'' || *buffer->cur == '"')
1361 lex_string (pfile, result, buffer->cur - 1);
1367 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1368 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1369 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1370 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1372 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1373 case 'G': case 'H': case 'I': case 'J': case 'K':
1374 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1375 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1377 result->type = CPP_NAME;
1379 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1380 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1382 warn_about_normalization (pfile, result, &nst);
1385 /* SDCC _asm specific */
1386 /* handle _asm ... _endasm ; */
1387 if (CPP_OPTION (pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1389 comment_start = buffer->cur;
1390 result->type = CPP_ASM;
1391 skip_asm_block (pfile);
1392 /* Save the _asm block as a token in its own right. */
1393 save_asm (pfile, result, comment_start);
1395 /* Convert named operators to their proper types. */
1396 else if (result->val.node->flags & NODE_OPERATOR)
1398 result->flags |= NAMED_OP;
1399 result->type = (enum cpp_ttype) result->val.node->directive_index;
1405 lex_string (pfile, result, buffer->cur - 1);
1409 /* A potential block or line comment. */
1410 comment_start = buffer->cur;
1415 if (_cpp_skip_block_comment (pfile))
1416 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1418 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1419 || cpp_in_system_header (pfile)))
1421 /* Warn about comments only if pedantically GNUC89, and not
1422 in system headers. */
1423 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1424 && ! buffer->warned_cplusplus_comments)
1426 cpp_error (pfile, CPP_DL_PEDWARN,
1427 "C++ style comments are not allowed in ISO C90");
1428 cpp_error (pfile, CPP_DL_PEDWARN,
1429 "(this will be reported only once per input file)");
1430 buffer->warned_cplusplus_comments = 1;
1433 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1434 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1439 result->type = CPP_DIV_EQ;
1444 result->type = CPP_DIV;
1448 if (!pfile->state.save_comments)
1450 result->flags |= PREV_WHITE;
1451 goto update_tokens_line;
1454 /* Save the comment as a token in its own right. */
1455 save_comment (pfile, result, comment_start, c);
1459 if (pfile->state.angled_headers)
1461 lex_string (pfile, result, buffer->cur - 1);
1465 result->type = CPP_LESS;
1466 if (*buffer->cur == '=')
1467 buffer->cur++, result->type = CPP_LESS_EQ;
1468 else if (*buffer->cur == '<')
1471 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1473 else if (CPP_OPTION (pfile, digraphs))
1475 if (*buffer->cur == ':')
1478 result->flags |= DIGRAPH;
1479 result->type = CPP_OPEN_SQUARE;
1481 else if (*buffer->cur == '%')
1484 result->flags |= DIGRAPH;
1485 result->type = CPP_OPEN_BRACE;
1491 result->type = CPP_GREATER;
1492 if (*buffer->cur == '=')
1493 buffer->cur++, result->type = CPP_GREATER_EQ;
1494 else if (*buffer->cur == '>')
1497 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1502 result->type = CPP_MOD;
1503 if (*buffer->cur == '=')
1504 buffer->cur++, result->type = CPP_MOD_EQ;
1505 else if (CPP_OPTION (pfile, digraphs))
1507 if (*buffer->cur == ':')
1510 result->flags |= DIGRAPH;
1511 result->type = CPP_HASH;
1512 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1513 buffer->cur += 2, result->type = CPP_PASTE;
1515 else if (*buffer->cur == '>')
1518 result->flags |= DIGRAPH;
1519 result->type = CPP_CLOSE_BRACE;
1525 result->type = CPP_DOT;
1526 if (ISDIGIT (*buffer->cur))
1528 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1529 result->type = CPP_NUMBER;
1530 if (CPP_OPTION (pfile, pedantic_parse_number))
1531 pedantic_lex_number (pfile, &result->val.str);
1533 lex_number (pfile, &result->val.str, &nst);
1534 warn_about_normalization (pfile, result, &nst);
1536 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1537 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1538 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1539 buffer->cur++, result->type = CPP_DOT_STAR;
1543 result->type = CPP_PLUS;
1544 if (*buffer->cur == '+')
1545 buffer->cur++, result->type = CPP_PLUS_PLUS;
1546 else if (*buffer->cur == '=')
1547 buffer->cur++, result->type = CPP_PLUS_EQ;
1551 result->type = CPP_MINUS;
1552 if (*buffer->cur == '>')
1555 result->type = CPP_DEREF;
1556 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1557 buffer->cur++, result->type = CPP_DEREF_STAR;
1559 else if (*buffer->cur == '-')
1560 buffer->cur++, result->type = CPP_MINUS_MINUS;
1561 else if (*buffer->cur == '=')
1562 buffer->cur++, result->type = CPP_MINUS_EQ;
1566 result->type = CPP_AND;
1567 if (*buffer->cur == '&')
1568 buffer->cur++, result->type = CPP_AND_AND;
1569 else if (*buffer->cur == '=')
1570 buffer->cur++, result->type = CPP_AND_EQ;
1574 result->type = CPP_OR;
1575 if (*buffer->cur == '|')
1576 buffer->cur++, result->type = CPP_OR_OR;
1577 else if (*buffer->cur == '=')
1578 buffer->cur++, result->type = CPP_OR_EQ;
1582 result->type = CPP_COLON;
1583 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1584 buffer->cur++, result->type = CPP_SCOPE;
1585 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1588 result->flags |= DIGRAPH;
1589 result->type = CPP_CLOSE_SQUARE;
1593 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1594 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1595 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1596 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1597 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1599 case '?': result->type = CPP_QUERY; break;
1600 case '~': result->type = CPP_COMPL; break;
1601 case ',': result->type = CPP_COMMA; break;
1602 case '(': result->type = CPP_OPEN_PAREN; break;
1603 case ')': result->type = CPP_CLOSE_PAREN; break;
1604 case '[': result->type = CPP_OPEN_SQUARE; break;
1605 case ']': result->type = CPP_CLOSE_SQUARE; break;
1606 case '{': result->type = CPP_OPEN_BRACE; break;
1607 case '}': result->type = CPP_CLOSE_BRACE; break;
1608 case ';': result->type = CPP_SEMICOLON; break;
1610 /* @ is a punctuator in Objective-C. */
1611 case '@': result->type = CPP_ATSIGN; break;
1616 const uchar *base = --buffer->cur;
1617 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1619 if (forms_identifier_p (pfile, true, &nst))
1621 result->type = CPP_NAME;
1622 result->val.node = lex_identifier (pfile, base, true, &nst);
1623 warn_about_normalization (pfile, result, &nst);
1630 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1637 /* An upper bound on the number of bytes needed to spell TOKEN.
1638 Does not include preceding whitespace. */
1640 cpp_token_len (const cpp_token *token)
1644 switch (TOKEN_SPELL (token))
1646 default: len = 4; break;
1647 case SPELL_LITERAL: len = token->val.str.len; break;
1648 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1654 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1655 Return the number of bytes read out of NAME. (There are always
1656 10 bytes written to BUFFER.) */
1659 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1665 unsigned long utf32;
1667 /* Compute the length of the UTF-8 sequence. */
1668 for (t = *name; t & 0x80; t <<= 1)
1671 utf32 = *name & (0x7F >> ucn_len);
1672 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1674 utf32 = (utf32 << 6) | (*++name & 0x3F);
1676 /* Ill-formed UTF-8. */
1677 if ((*name & ~0x3F) != 0x80)
1683 for (j = 7; j >= 0; j--)
1684 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1689 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1690 already contain the enough space to hold the token's spelling.
1691 Returns a pointer to the character after the last character written.
1692 FORSTRING is true if this is to be the spelling after translation
1693 phase 1 (this is different for UCNs).
1694 FIXME: Would be nice if we didn't need the PFILE argument. */
1696 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1697 unsigned char *buffer, bool forstring)
1699 switch (TOKEN_SPELL (token))
1701 case SPELL_OPERATOR:
1703 const unsigned char *spelling;
1706 if (token->flags & DIGRAPH)
1708 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1709 else if (token->flags & NAMED_OP)
1712 spelling = TOKEN_NAME (token);
1714 while ((c = *spelling++) != '\0')
1723 memcpy (buffer, NODE_NAME (token->val.node),
1724 NODE_LEN (token->val.node));
1725 buffer += NODE_LEN (token->val.node);
1730 const unsigned char * name = NODE_NAME (token->val.node);
1732 for (i = 0; i < NODE_LEN (token->val.node); i++)
1733 if (name[i] & ~0x7F)
1735 i += utf8_to_ucn (buffer, name + i) - 1;
1739 *buffer++ = NODE_NAME (token->val.node)[i];
1744 memcpy (buffer, token->val.str.text, token->val.str.len);
1745 buffer += token->val.str.len;
1749 cpp_error (pfile, CPP_DL_ICE,
1750 "unspellable token %s", TOKEN_NAME (token));
1757 /* Returns TOKEN spelt as a null-terminated string. The string is
1758 freed when the reader is destroyed. Useful for diagnostics. */
1760 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1762 unsigned int len = cpp_token_len (token) + 1;
1763 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1765 end = cpp_spell_token (pfile, token, start, false);
1771 /* Used by C front ends, which really should move to using
1772 cpp_token_as_text. */
1774 cpp_type2name (enum cpp_ttype type)
1776 return (const char *) token_spellings[type].name;
1779 /* Writes the spelling of token to FP, without any preceding space.
1780 Separated from cpp_spell_token for efficiency - to avoid stdio
1781 double-buffering. */
1783 cpp_output_token (const cpp_token *token, FILE *fp)
1785 switch (TOKEN_SPELL (token))
1787 case SPELL_OPERATOR:
1789 const unsigned char *spelling;
1792 if (token->flags & DIGRAPH)
1794 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1795 else if (token->flags & NAMED_OP)
1798 spelling = TOKEN_NAME (token);
1803 while ((c = *++spelling) != '\0');
1811 const unsigned char * name = NODE_NAME (token->val.node);
1813 for (i = 0; i < NODE_LEN (token->val.node); i++)
1814 if (name[i] & ~0x7F)
1816 unsigned char buffer[10];
1817 i += utf8_to_ucn (buffer, name + i) - 1;
1818 fwrite (buffer, 1, 10, fp);
1821 fputc (NODE_NAME (token->val.node)[i], fp);
1826 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1830 /* An error, most probably. */
1835 /* Compare two tokens. */
1837 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1839 if (a->type == b->type && a->flags == b->flags)
1840 switch (TOKEN_SPELL (a))
1842 default: /* Keep compiler happy. */
1843 case SPELL_OPERATOR:
1846 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1848 return a->val.node == b->val.node;
1850 return (a->val.str.len == b->val.str.len
1851 && !memcmp (a->val.str.text, b->val.str.text,
1858 /* Returns nonzero if a space should be inserted to avoid an
1859 accidental token paste for output. For simplicity, it is
1860 conservative, and occasionally advises a space where one is not
1861 needed, e.g. "." and ".2". */
1863 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1864 const cpp_token *token2)
1866 enum cpp_ttype a = token1->type, b = token2->type;
1869 if (token1->flags & NAMED_OP)
1871 if (token2->flags & NAMED_OP)
1875 if (token2->flags & DIGRAPH)
1876 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1877 else if (token_spellings[b].category == SPELL_OPERATOR)
1878 c = token_spellings[b].name[0];
1880 /* Quickly get everything that can paste with an '='. */
1881 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1886 case CPP_GREATER: return c == '>';
1887 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1888 case CPP_PLUS: return c == '+';
1889 case CPP_MINUS: return c == '-' || c == '>';
1890 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1891 case CPP_MOD: return c == ':' || c == '>';
1892 case CPP_AND: return c == '&';
1893 case CPP_OR: return c == '|';
1894 case CPP_COLON: return c == ':' || c == '>';
1895 case CPP_DEREF: return c == '*';
1896 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1897 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1898 case CPP_NAME: return ((b == CPP_NUMBER
1899 && name_p (pfile, &token2->val.str))
1901 || b == CPP_CHAR || b == CPP_STRING); /* L */
1902 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1903 || c == '.' || c == '+' || c == '-');
1905 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1907 || (CPP_OPTION (pfile, objc)
1908 && token1->val.str.text[0] == '@'
1909 && (b == CPP_NAME || b == CPP_STRING)));
1916 /* Output all the remaining tokens on the current line, and a newline
1917 character, to FP. Leading whitespace is removed. If there are
1918 macros, special token padding is not performed. */
1920 cpp_output_line (cpp_reader *pfile, FILE *fp)
1922 const cpp_token *token;
1924 token = cpp_get_token (pfile);
1925 while (token->type != CPP_EOF)
1927 cpp_output_token (token, fp);
1928 token = cpp_get_token (pfile);
1929 if (token->flags & PREV_WHITE)
1936 /* Memory buffers. Changing these three constants can have a dramatic
1937 effect on performance. The values here are reasonable defaults,
1938 but might be tuned. If you adjust them, be sure to test across a
1939 range of uses of cpplib, including heavy nested function-like macro
1940 expansion. Also check the change in peak memory usage (NJAMD is a
1941 good tool for this). */
1942 #define MIN_BUFF_SIZE 8000
1943 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1944 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1945 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1947 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1948 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1951 /* Create a new allocation buffer. Place the control block at the end
1952 of the buffer, so that buffer overflows will cause immediate chaos. */
1954 new_buff (size_t len)
1957 unsigned char *base;
1959 if (len < MIN_BUFF_SIZE)
1960 len = MIN_BUFF_SIZE;
1961 len = CPP_ALIGN (len);
1963 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1964 result = (_cpp_buff *) (base + len);
1965 result->base = base;
1967 result->limit = base + len;
1968 result->next = NULL;
1972 /* Place a chain of unwanted allocation buffers on the free list. */
1974 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1976 _cpp_buff *end = buff;
1980 end->next = pfile->free_buffs;
1981 pfile->free_buffs = buff;
1984 /* Return a free buffer of size at least MIN_SIZE. */
1986 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1988 _cpp_buff *result, **p;
1990 for (p = &pfile->free_buffs;; p = &(*p)->next)
1995 return new_buff (min_size);
1997 size = result->limit - result->base;
1998 /* Return a buffer that's big enough, but don't waste one that's
2000 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2005 result->next = NULL;
2006 result->cur = result->base;
2010 /* Creates a new buffer with enough space to hold the uncommitted
2011 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2012 the excess bytes to the new buffer. Chains the new buffer after
2013 BUFF, and returns the new buffer. */
2015 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2017 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2018 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2020 buff->next = new_buff;
2021 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2025 /* Creates a new buffer with enough space to hold the uncommitted
2026 remaining bytes of the buffer pointed to by BUFF, and at least
2027 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2028 Chains the new buffer before the buffer pointed to by BUFF, and
2029 updates the pointer to point to the new buffer. */
2031 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2033 _cpp_buff *new_buff, *old_buff = *pbuff;
2034 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2036 new_buff = _cpp_get_buff (pfile, size);
2037 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2038 new_buff->next = old_buff;
2042 /* Free a chain of buffers starting at BUFF. */
2044 _cpp_free_buff (_cpp_buff *buff)
2048 for (; buff; buff = next)
2055 /* Allocate permanent, unaligned storage of length LEN. */
2057 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2059 _cpp_buff *buff = pfile->u_buff;
2060 unsigned char *result = buff->cur;
2062 if (len > (size_t) (buff->limit - result))
2064 buff = _cpp_get_buff (pfile, len);
2065 buff->next = pfile->u_buff;
2066 pfile->u_buff = buff;
2070 buff->cur = result + len;
2074 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2075 That buffer is used for growing allocations when saving macro
2076 replacement lists in a #define, and when parsing an answer to an
2077 assertion in #assert, #unassert or #if (and therefore possibly
2078 whilst expanding macros). It therefore must not be used by any
2079 code that they might call: specifically the lexer and the guts of
2082 All existing other uses clearly fit this restriction: storing
2083 registered pragmas during initialization. */
2085 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2087 _cpp_buff *buff = pfile->a_buff;
2088 unsigned char *result = buff->cur;
2090 if (len > (size_t) (buff->limit - result))
2092 buff = _cpp_get_buff (pfile, len);
2093 buff->next = pfile->a_buff;
2094 pfile->a_buff = buff;
2098 buff->cur = result + len;
2102 /* Say which field of TOK is in use. */
2104 enum cpp_token_fld_kind
2105 cpp_token_val_index (cpp_token *tok)
2107 switch (TOKEN_SPELL (tok))
2110 return CPP_TOKEN_FLD_NODE;
2112 return CPP_TOKEN_FLD_STR;
2114 if (tok->type == CPP_MACRO_ARG)
2115 return CPP_TOKEN_FLD_ARG_NO;
2116 else if (tok->type == CPP_PADDING)
2117 return CPP_TOKEN_FLD_SOURCE;
2118 else if (tok->type == CPP_PRAGMA)
2119 return CPP_TOKEN_FLD_PRAGMA;
2120 /* else fall through */
2122 return CPP_TOKEN_FLD_NONE;