1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
43 /* MULTIBYTE_CHARS support only works for native compilers.
44 ??? Ideally what we want is to model widechar support after
45 the current floating point support. */
47 #undef MULTIBYTE_CHARS
50 #ifdef MULTIBYTE_CHARS
55 /* Tokens with SPELL_STRING store their spelling in the token list,
56 and it's length in the token->val.name.len. */
68 enum spell_type category;
69 const unsigned char *name;
72 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
75 #define OP(e, s) { SPELL_OPERATOR, U s },
76 #define TK(e, s) { s, U STRINGX (e) },
77 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
81 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
82 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
84 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
85 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
86 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
88 static int skip_asm_block PARAMS ((cpp_reader *, int));
89 static int skip_block_comment PARAMS ((cpp_reader *));
90 static int skip_line_comment PARAMS ((cpp_reader *));
91 static void adjust_column PARAMS ((cpp_reader *));
92 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
93 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
97 static void unterminated PARAMS ((cpp_reader *, int));
98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99 static unsigned int copy_text_chars (char *, const char *, unsigned int, int);
100 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *, int));
101 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
102 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
103 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
104 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
105 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
106 const unsigned char *, unsigned int *));
108 static cpp_chunk *new_chunk PARAMS ((unsigned int));
109 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
110 static unsigned int hex_digit_value PARAMS ((unsigned int));
114 Compares, the token TOKEN to the NUL-terminated string STRING.
115 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
118 cpp_ideq (token, string)
119 const cpp_token *token;
122 if (token->type != CPP_NAME)
125 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
128 /* Call when meeting a newline. Returns the character after the newline
129 (or carriage-return newline combination), or EOF. */
131 handle_newline (buffer, newline_char)
133 cppchar_t newline_char;
135 cppchar_t next = EOF;
137 buffer->col_adjust = 0;
139 buffer->line_base = buffer->cur;
141 /* Handle CR-LF and LF-CR combinations, get the next character. */
142 if (buffer->cur < buffer->rlimit)
144 next = *buffer->cur++;
145 if (next + newline_char == '\r' + '\n')
147 buffer->line_base = buffer->cur;
148 if (buffer->cur < buffer->rlimit)
149 next = *buffer->cur++;
155 buffer->read_ahead = next;
159 /* Subroutine of skip_escaped_newlines; called when a trigraph is
160 encountered. It warns if necessary, and returns true if the
161 trigraph should be honoured. FROM_CHAR is the third character of a
162 trigraph, and presumed to be the previous character for position
165 trigraph_ok (pfile, from_char)
169 int accept = CPP_OPTION (pfile, trigraphs);
171 /* Don't warn about trigraphs in comments. */
172 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
174 cpp_buffer *buffer = pfile->buffer;
176 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
177 "trigraph ??%c converted to %c",
179 (int) _cpp_trigraph_map[from_char]);
180 else if (buffer->cur != buffer->last_Wtrigraphs)
182 buffer->last_Wtrigraphs = buffer->cur;
183 cpp_warning_with_line (pfile, buffer->lineno,
184 CPP_BUF_COL (buffer) - 2,
185 "trigraph ??%c ignored", (int) from_char);
192 /* Assumes local variables buffer and result. */
193 #define ACCEPT_CHAR(t) \
194 do { result->type = t; buffer->read_ahead = EOF; } while (0)
196 /* When we move to multibyte character sets, add to these something
197 that saves and restores the state of the multibyte conversion
198 library. This probably involves saving and restoring a "cookie".
199 In the case of glibc it is an 8-byte structure, so is not a high
200 overhead operation. In any case, it's out of the fast path. */
201 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
202 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
204 /* Skips any escaped newlines introduced by NEXT, which is either a
205 '?' or a '\\'. Returns the next character, which will also have
206 been placed in buffer->read_ahead. This routine performs
207 preprocessing stages 1 and 2 of the ISO C standard. */
209 skip_escaped_newlines (buffer, next)
213 /* Only do this if we apply stages 1 and 2. */
214 if (!buffer->from_stage3)
217 const unsigned char *saved_cur;
222 if (buffer->cur == buffer->rlimit)
228 next1 = *buffer->cur++;
229 if (next1 != '?' || buffer->cur == buffer->rlimit)
235 next1 = *buffer->cur++;
236 if (!_cpp_trigraph_map[next1]
237 || !trigraph_ok (buffer->pfile, next1))
243 /* We have a full trigraph here. */
244 next = _cpp_trigraph_map[next1];
245 if (next != '\\' || buffer->cur == buffer->rlimit)
250 /* We have a backslash, and room for at least one more character. */
254 next1 = *buffer->cur++;
255 if (!is_nvspace (next1))
259 while (buffer->cur < buffer->rlimit);
261 if (!is_vspace (next1))
267 if (space && !buffer->pfile->state.lexing_comment)
268 cpp_warning (buffer->pfile,
269 "backslash and newline separated by space");
271 next = handle_newline (buffer, next1);
273 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
275 while (next == '\\' || next == '?');
278 buffer->read_ahead = next;
282 /* Obtain the next character, after trigraph conversion and skipping
283 an arbitrary string of escaped newlines. The common case of no
284 trigraphs or escaped newlines falls through quickly. */
286 get_effective_char (buffer)
289 cppchar_t next = EOF;
291 if (buffer->cur < buffer->rlimit)
293 next = *buffer->cur++;
295 /* '?' can introduce trigraphs (and therefore backslash); '\\'
296 can introduce escaped newlines, which we want to skip, or
297 UCNs, which, depending upon lexer state, we will handle in
299 if (next == '?' || next == '\\')
300 next = skip_escaped_newlines (buffer, next);
303 buffer->read_ahead = next;
307 /* SDCC _asm specific */
308 /* Skip an _asm ... _endasm block. We find the end of the comment by
309 seeing _endasm. Returns non-zero if _asm terminated by EOF, zero
312 skip_asm_block (pfile, read_ahead)
316 #define _ENDASM_STR "endasm"
317 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
319 cpp_buffer *buffer = pfile->buffer;
324 pfile->state.lexing_comment = 1;
325 while (buffer->cur != buffer->rlimit)
327 if (read_ahead != EOF)
330 c = buffer->read_ahead;
335 prev_space = is_space(c);
340 /* FIXME: For speed, create a new character class of characters
341 of interest inside block comments. */
342 if (c == '?' || c == '\\')
343 c = skip_escaped_newlines (buffer, c);
345 if (prev_space && c == '_')
347 if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
348 strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
350 buffer->cur += _ENDASM_LEN;
355 else if (is_vspace (c))
357 prev_space = is_space(c), c = handle_newline (buffer, c);
361 adjust_column (pfile);
364 pfile->state.lexing_comment = 0;
365 buffer->read_ahead = EOF;
369 /* Skip a C-style block comment. We find the end of the comment by
370 seeing if an asterisk is before every '/' we encounter. Returns
371 non-zero if comment terminated by EOF, zero otherwise. */
373 skip_block_comment (pfile)
376 cpp_buffer *buffer = pfile->buffer;
377 cppchar_t c = EOF, prevc = EOF;
379 pfile->state.lexing_comment = 1;
380 while (buffer->cur != buffer->rlimit)
382 prevc = c, c = *buffer->cur++;
385 /* FIXME: For speed, create a new character class of characters
386 of interest inside block comments. */
387 if (c == '?' || c == '\\')
388 c = skip_escaped_newlines (buffer, c);
390 /* People like decorating comments with '*', so check for '/'
391 instead for efficiency. */
397 /* Warn about potential nested comments, but not if the '/'
398 comes immediately before the true comment delimeter.
399 Don't bother to get it right across escaped newlines. */
400 if (CPP_OPTION (pfile, warn_comments)
401 && buffer->cur != buffer->rlimit)
403 prevc = c, c = *buffer->cur++;
404 if (c == '*' && buffer->cur != buffer->rlimit)
406 prevc = c, c = *buffer->cur++;
408 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
409 CPP_BUF_COL (buffer),
410 "\"/*\" within comment");
415 else if (is_vspace (c))
417 prevc = c, c = handle_newline (buffer, c);
421 adjust_column (pfile);
424 pfile->state.lexing_comment = 0;
425 buffer->read_ahead = EOF;
426 return c != '/' || prevc != '*';
429 /* Skip a C++ line comment. Handles escaped newlines. Returns
430 non-zero if a multiline comment. The following new line, if any,
431 is left in buffer->read_ahead. */
433 skip_line_comment (pfile)
436 cpp_buffer *buffer = pfile->buffer;
437 unsigned int orig_lineno = buffer->lineno;
440 pfile->state.lexing_comment = 1;
444 if (buffer->cur == buffer->rlimit)
448 if (c == '?' || c == '\\')
449 c = skip_escaped_newlines (buffer, c);
451 while (!is_vspace (c));
453 pfile->state.lexing_comment = 0;
454 buffer->read_ahead = c; /* Leave any newline for caller. */
455 return orig_lineno != buffer->lineno;
458 /* pfile->buffer->cur is one beyond the \t character. Update
459 col_adjust so we track the column correctly. */
461 adjust_column (pfile)
464 cpp_buffer *buffer = pfile->buffer;
465 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
467 /* Round it up to multiple of the tabstop, but subtract 1 since the
468 tab itself occupies a character position. */
469 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
470 - col % CPP_OPTION (pfile, tabstop)) - 1;
473 /* Skips whitespace, saving the next non-whitespace character.
474 Adjusts pfile->col_adjust to account for tabs. Without this,
475 tokens might be assigned an incorrect column. */
477 skip_whitespace (pfile, c)
481 cpp_buffer *buffer = pfile->buffer;
482 unsigned int warned = 0;
486 /* Horizontal space always OK. */
490 adjust_column (pfile);
491 /* Just \f \v or \0 left. */
496 cpp_warning (pfile, "null character(s) ignored");
500 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
501 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
502 CPP_BUF_COL (buffer),
503 "%s in preprocessing directive",
504 c == '\f' ? "form feed" : "vertical tab");
507 if (buffer->cur == buffer->rlimit)
511 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
512 while (is_nvspace (c));
514 /* Remember the next character. */
515 buffer->read_ahead = c;
518 /* See if the characters of a number token are valid in a name (no
521 name_p (pfile, string)
523 const cpp_string *string;
527 for (i = 0; i < string->len; i++)
528 if (!is_idchar (string->text[i]))
534 /* Parse an identifier, skipping embedded backslash-newlines.
535 Calculate the hash value of the token while parsing, for improved
536 performance. The hashing algorithm *must* match cpp_lookup(). */
538 static cpp_hashnode *
539 parse_identifier (pfile, c)
543 cpp_hashnode *result;
544 cpp_buffer *buffer = pfile->buffer;
545 unsigned int saw_dollar = 0, len;
546 struct obstack *stack = &pfile->hash_table->stack;
552 obstack_1grow (stack, c);
558 if (buffer->cur == buffer->rlimit)
563 while (is_idchar (c));
565 /* Potential escaped newline? */
566 if (c != '?' && c != '\\')
568 c = skip_escaped_newlines (buffer, c);
570 while (is_idchar (c));
572 /* Remember the next character. */
573 buffer->read_ahead = c;
575 /* $ is not a identifier character in the standard, but is commonly
576 accepted as an extension. Don't warn about it in skipped
577 conditional blocks. */
578 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
579 cpp_pedwarn (pfile, "'$' character(s) in identifier");
581 /* Identifiers are null-terminated. */
582 len = obstack_object_size (stack);
583 obstack_1grow (stack, '\0');
585 /* This routine commits the memory if necessary. */
586 result = (cpp_hashnode *)
587 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
589 /* Some identifiers require diagnostics when lexed. */
590 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
592 /* It is allowed to poison the same identifier twice. */
593 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
594 cpp_error (pfile, "attempt to use poisoned \"%s\"",
597 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
598 replacement list of a variadic macro. */
599 if (result == pfile->spec_nodes.n__VA_ARGS__
600 && !pfile->state.va_args_ok)
601 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
607 /* Parse a number, skipping embedded backslash-newlines. */
609 parse_number (pfile, number, c, leading_period)
615 cpp_buffer *buffer = pfile->buffer;
616 cpp_pool *pool = &pfile->ident_pool;
617 unsigned char *dest, *limit;
619 dest = POOL_FRONT (pool);
620 limit = POOL_LIMIT (pool);
622 /* Place a leading period. */
626 limit = _cpp_next_chunk (pool, 0, &dest);
634 /* Need room for terminating null. */
635 if (dest + 1 >= limit)
636 limit = _cpp_next_chunk (pool, 0, &dest);
640 if (buffer->cur == buffer->rlimit)
645 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
647 /* Potential escaped newline? */
648 if (c != '?' && c != '\\')
650 c = skip_escaped_newlines (buffer, c);
652 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
654 /* Remember the next character. */
655 buffer->read_ahead = c;
657 /* Null-terminate the number. */
660 number->text = POOL_FRONT (pool);
661 number->len = dest - number->text;
662 POOL_COMMIT (pool, number->len + 1);
665 /* Subroutine of parse_string. Emits error for unterminated strings. */
667 unterminated (pfile, term)
671 cpp_error (pfile, "missing terminating %c character", term);
673 if (term == '\"' && pfile->mlstring_pos.line
674 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
676 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
677 pfile->mlstring_pos.col,
678 "possible start of unterminated string literal");
679 pfile->mlstring_pos.line = 0;
683 /* Subroutine of parse_string. */
685 unescaped_terminator_p (pfile, dest)
687 const unsigned char *dest;
689 const unsigned char *start, *temp;
691 /* In #include-style directives, terminators are not escapeable. */
692 if (pfile->state.angled_headers)
695 start = POOL_FRONT (&pfile->ident_pool);
697 /* An odd number of consecutive backslashes represents an escaped
699 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
702 return ((dest - temp) & 1) == 0;
705 /* Parses a string, character constant, or angle-bracketed header file
706 name. Handles embedded trigraphs and escaped newlines. The stored
707 string is guaranteed NUL-terminated, but it is not guaranteed that
708 this is the first NUL since embedded NULs are preserved.
710 Multi-line strings are allowed, but they are deprecated. */
712 parse_string (pfile, token, terminator)
715 cppchar_t terminator;
717 cpp_buffer *buffer = pfile->buffer;
718 cpp_pool *pool = &pfile->ident_pool;
719 unsigned char *dest, *limit;
721 unsigned int nulls = 0;
723 dest = POOL_FRONT (pool);
724 limit = POOL_LIMIT (pool);
728 if (buffer->cur == buffer->rlimit)
734 /* We need space for the terminating NUL. */
736 limit = _cpp_next_chunk (pool, 0, &dest);
740 unterminated (pfile, terminator);
744 /* Handle trigraphs, escaped newlines etc. */
745 if (c == '?' || c == '\\')
746 c = skip_escaped_newlines (buffer, c);
748 if (c == terminator && unescaped_terminator_p (pfile, dest))
753 else if (is_vspace (c))
755 /* In assembly language, silently terminate string and
756 character literals at end of line. This is a kludge
757 around not knowing where comments are. */
758 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
761 /* Character constants and header names may not extend over
762 multiple lines. In Standard C, neither may strings.
763 Unfortunately, we accept multiline strings as an
764 extension, except in #include family directives. */
765 if (terminator != '"' || pfile->state.angled_headers)
767 unterminated (pfile, terminator);
771 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
772 if (pfile->mlstring_pos.line == 0)
773 pfile->mlstring_pos = pfile->lexer_pos;
775 c = handle_newline (buffer, c);
782 cpp_warning (pfile, "null character(s) preserved in literal");
788 /* Remember the next character. */
789 buffer->read_ahead = c;
792 token->val.str.text = POOL_FRONT (pool);
793 token->val.str.len = dest - token->val.str.text;
794 POOL_COMMIT (pool, token->val.str.len + 1);
797 /* Count and copy characters from src to dest, excluding CRs:
798 CRs are automatically generated, because the output is
799 opened in TEXT mode. If dest == NULL, only count chars */
801 copy_text_chars (dest, src, len, read_ahead)
810 if (read_ahead != EOF && read_ahead != '\r')
813 *dest++ = read_ahead;
817 for (p = src; p != src + len; ++p)
832 /* SDCC _asm specific */
833 /* The stored comment includes the comment start and any terminator. */
835 save_asm (pfile, token, from, read_ahead)
838 const unsigned char *from;
841 #define _ASM_STR "_asm"
842 #define _ASM_LEN ((sizeof _ASM_STR) - 1)
844 unsigned char *buffer;
845 unsigned int text_len, len;
847 /* ignore read_ahead if it is a CR */
848 if (read_ahead == '\r')
850 len = pfile->buffer->cur - from;
851 /* + _ASM_LEN for the initial '_asm'. */
852 text_len = copy_text_chars (NULL, from, len, read_ahead) + _ASM_LEN;
853 buffer = _cpp_pool_alloc (&pfile->ident_pool, text_len);
855 token->type = CPP_ASM;
856 token->val.str.len = text_len;
857 token->val.str.text = buffer;
859 memcpy (buffer, _ASM_STR, _ASM_LEN);
860 copy_text_chars (buffer + _ASM_LEN, from, len, read_ahead);
863 /* The stored comment includes the comment start and any terminator. */
865 save_comment (pfile, token, from)
868 const unsigned char *from;
870 unsigned char *buffer;
871 unsigned int text_len, len;
873 len = pfile->buffer->cur - from;
874 /* C++ comments probably (not definitely) have moved past a new
875 line, which we don't want to save in the comment. */
876 if (pfile->buffer->read_ahead != EOF)
878 /* + 1 for the initial '/'. */
879 text_len = copy_text_chars (NULL, from, len, EOF) + 1;
880 buffer = _cpp_pool_alloc (&pfile->ident_pool, text_len);
882 token->type = CPP_COMMENT;
883 token->val.str.len = text_len;
884 token->val.str.text = buffer;
887 copy_text_chars (buffer + 1, from, len, EOF);
890 /* Subroutine of lex_token to handle '%'. A little tricky, since we
891 want to avoid stepping back when lexing %:%X. */
893 lex_percent (buffer, result)
899 result->type = CPP_MOD;
900 /* Parsing %:%X could leave an extra character. */
901 if (buffer->extra_char == EOF)
902 c = get_effective_char (buffer);
905 c = buffer->read_ahead = buffer->extra_char;
906 buffer->extra_char = EOF;
910 ACCEPT_CHAR (CPP_MOD_EQ);
911 else if (CPP_OPTION (buffer->pfile, digraphs))
915 result->flags |= DIGRAPH;
916 ACCEPT_CHAR (CPP_HASH);
917 if (get_effective_char (buffer) == '%')
919 buffer->extra_char = get_effective_char (buffer);
920 if (buffer->extra_char == ':')
922 buffer->extra_char = EOF;
923 ACCEPT_CHAR (CPP_PASTE);
926 /* We'll catch the extra_char when we're called back. */
927 buffer->read_ahead = '%';
932 result->flags |= DIGRAPH;
933 ACCEPT_CHAR (CPP_CLOSE_BRACE);
938 /* Subroutine of lex_token to handle '.'. This is tricky, since we
939 want to avoid stepping back when lexing '...' or '.123'. In the
940 latter case we should also set a flag for parse_number. */
942 lex_dot (pfile, result)
946 cpp_buffer *buffer = pfile->buffer;
949 /* Parsing ..X could leave an extra character. */
950 if (buffer->extra_char == EOF)
951 c = get_effective_char (buffer);
954 c = buffer->read_ahead = buffer->extra_char;
955 buffer->extra_char = EOF;
958 /* All known character sets have 0...9 contiguous. */
959 if (c >= '0' && c <= '9')
961 result->type = CPP_NUMBER;
962 parse_number (pfile, &result->val.str, c, 1);
966 result->type = CPP_DOT;
969 buffer->extra_char = get_effective_char (buffer);
970 if (buffer->extra_char == '.')
972 buffer->extra_char = EOF;
973 ACCEPT_CHAR (CPP_ELLIPSIS);
976 /* We'll catch the extra_char when we're called back. */
977 buffer->read_ahead = '.';
979 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
980 ACCEPT_CHAR (CPP_DOT_STAR);
985 _cpp_lex_token (pfile, result)
991 const unsigned char *comment_start;
995 bol = pfile->state.next_bol;
997 buffer = pfile->buffer;
998 pfile->state.next_bol = 0;
999 result->flags = buffer->saved_flags;
1000 buffer->saved_flags = 0;
1002 pfile->lexer_pos.line = buffer->lineno;
1004 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
1006 c = buffer->read_ahead;
1007 if (c == EOF && buffer->cur < buffer->rlimit)
1010 pfile->lexer_pos.col++;
1014 buffer->read_ahead = EOF;
1018 /* Non-empty files should end in a newline. Checking "bol" too
1019 prevents multiple warnings when hitting the EOF more than
1020 once, like in a directive. Don't warn for command line and
1022 if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
1023 cpp_pedwarn (pfile, "no newline at end of file");
1024 pfile->state.next_bol = 1;
1025 pfile->skipping = 0; /* In case missing #endif. */
1026 result->type = CPP_EOF;
1027 /* Don't do MI optimisation. */
1030 case ' ': case '\t': case '\f': case '\v': case '\0':
1031 skip_whitespace (pfile, c);
1032 result->flags |= PREV_WHITE;
1035 case '\n': case '\r':
1036 if (!pfile->state.in_directive)
1038 handle_newline (buffer, c);
1040 pfile->lexer_pos.output_line = buffer->lineno;
1041 /* This is a new line, so clear any white space flag.
1042 Newlines in arguments are white space (6.10.3.10);
1043 parse_arg takes care of that. */
1044 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1048 /* Don't let directives spill over to the next line. */
1049 buffer->read_ahead = c;
1050 pfile->state.next_bol = 1;
1051 result->type = CPP_EOF;
1052 /* Don't break; pfile->skipping might be true. */
1057 /* These could start an escaped newline, or '?' a trigraph. Let
1058 skip_escaped_newlines do all the work. */
1060 unsigned int lineno = buffer->lineno;
1062 c = skip_escaped_newlines (buffer, c);
1063 if (lineno != buffer->lineno)
1064 /* We had at least one escaped newline of some sort, and the
1065 next character is in buffer->read_ahead. Update the
1066 token's line and column. */
1069 /* We are either the original '?' or '\\', or a trigraph. */
1070 result->type = CPP_QUERY;
1071 buffer->read_ahead = EOF;
1079 case '0': case '1': case '2': case '3': case '4':
1080 case '5': case '6': case '7': case '8': case '9':
1081 result->type = CPP_NUMBER;
1082 parse_number (pfile, &result->val.str, c, 0);
1086 if (!CPP_OPTION (pfile, dollars_in_ident))
1088 /* Fall through... */
1091 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1092 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1093 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1094 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1096 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1097 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1098 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1099 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1101 result->type = CPP_NAME;
1102 result->val.node = parse_identifier (pfile, c);
1104 /* 'L' may introduce wide characters or strings. */
1105 if (result->val.node == pfile->spec_nodes.n_L)
1107 c = buffer->read_ahead; /* For make_string. */
1108 if (c == '\'' || c == '"')
1110 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1114 /* SDCC _asm specific */
1115 /* handle _asm ... _endasm ; */
1116 else if (result->val.node == pfile->spec_nodes.n__asm)
1118 int read_ahead = buffer->read_ahead;
1120 comment_start = buffer->cur;
1121 result->type = CPP_ASM;
1122 skip_asm_block (pfile, read_ahead);
1123 /* Save the _asm block as a token in its own right. */
1124 save_asm (pfile, result, comment_start, read_ahead);
1126 /* Convert named operators to their proper types. */
1127 else if (result->val.node->flags & NODE_OPERATOR)
1129 result->flags |= NAMED_OP;
1130 result->type = result->val.node->value.operator;
1136 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1138 parse_string (pfile, result, c);
1142 /* A potential block or line comment. */
1143 comment_start = buffer->cur;
1144 result->type = CPP_DIV;
1145 c = get_effective_char (buffer);
1147 ACCEPT_CHAR (CPP_DIV_EQ);
1148 if (c != '/' && c != '*')
1153 if (skip_block_comment (pfile))
1154 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1155 pfile->lexer_pos.col,
1156 "unterminated comment");
1160 if (!CPP_OPTION (pfile, cplusplus_comments)
1161 && !CPP_IN_SYSTEM_HEADER (pfile))
1164 /* Warn about comments only if pedantically GNUC89, and not
1165 in system headers. */
1166 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1167 && ! buffer->warned_cplusplus_comments)
1170 "C++ style comments are not allowed in ISO C89");
1172 "(this will be reported only once per input file)");
1173 buffer->warned_cplusplus_comments = 1;
1176 /* Skip_line_comment updates buffer->read_ahead. */
1177 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1178 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1179 pfile->lexer_pos.col,
1180 "multi-line comment");
1183 /* Skipping the comment has updated buffer->read_ahead. */
1184 if (!pfile->state.save_comments)
1186 result->flags |= PREV_WHITE;
1190 /* Save the comment as a token in its own right. */
1191 save_comment (pfile, result, comment_start);
1193 when executed with -C option, comments
1194 were included even if they where in skipped #if block.
1195 Applied solution from GCC cpp 3.3.2 */
1199 if (pfile->state.angled_headers)
1201 result->type = CPP_HEADER_NAME;
1202 c = '>'; /* terminator. */
1206 result->type = CPP_LESS;
1207 c = get_effective_char (buffer);
1209 ACCEPT_CHAR (CPP_LESS_EQ);
1212 ACCEPT_CHAR (CPP_LSHIFT);
1213 if (get_effective_char (buffer) == '=')
1214 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1216 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1218 ACCEPT_CHAR (CPP_MIN);
1219 if (get_effective_char (buffer) == '=')
1220 ACCEPT_CHAR (CPP_MIN_EQ);
1222 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1224 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1225 result->flags |= DIGRAPH;
1227 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1229 ACCEPT_CHAR (CPP_OPEN_BRACE);
1230 result->flags |= DIGRAPH;
1235 result->type = CPP_GREATER;
1236 c = get_effective_char (buffer);
1238 ACCEPT_CHAR (CPP_GREATER_EQ);
1241 ACCEPT_CHAR (CPP_RSHIFT);
1242 if (get_effective_char (buffer) == '=')
1243 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1245 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1247 ACCEPT_CHAR (CPP_MAX);
1248 if (get_effective_char (buffer) == '=')
1249 ACCEPT_CHAR (CPP_MAX_EQ);
1254 lex_percent (buffer, result);
1255 if (result->type == CPP_HASH)
1260 lex_dot (pfile, result);
1264 result->type = CPP_PLUS;
1265 c = get_effective_char (buffer);
1267 ACCEPT_CHAR (CPP_PLUS_EQ);
1269 ACCEPT_CHAR (CPP_PLUS_PLUS);
1273 result->type = CPP_MINUS;
1274 c = get_effective_char (buffer);
1277 ACCEPT_CHAR (CPP_DEREF);
1278 if (CPP_OPTION (pfile, cplusplus)
1279 && get_effective_char (buffer) == '*')
1280 ACCEPT_CHAR (CPP_DEREF_STAR);
1283 ACCEPT_CHAR (CPP_MINUS_EQ);
1285 ACCEPT_CHAR (CPP_MINUS_MINUS);
1289 result->type = CPP_MULT;
1290 if (get_effective_char (buffer) == '=')
1291 ACCEPT_CHAR (CPP_MULT_EQ);
1295 result->type = CPP_EQ;
1296 if (get_effective_char (buffer) == '=')
1297 ACCEPT_CHAR (CPP_EQ_EQ);
1301 result->type = CPP_NOT;
1302 if (get_effective_char (buffer) == '=')
1303 ACCEPT_CHAR (CPP_NOT_EQ);
1307 result->type = CPP_AND;
1308 c = get_effective_char (buffer);
1310 ACCEPT_CHAR (CPP_AND_EQ);
1312 ACCEPT_CHAR (CPP_AND_AND);
1316 c = buffer->extra_char; /* Can be set by error condition below. */
1319 buffer->read_ahead = c;
1320 buffer->extra_char = EOF;
1323 c = get_effective_char (buffer);
1327 ACCEPT_CHAR (CPP_PASTE);
1331 result->type = CPP_HASH;
1335 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1336 tokens within the list of arguments that would otherwise act
1337 as preprocessing directives, the behavior is undefined.
1339 This implementation will report a hard error, terminate the
1340 macro invocation, and proceed to process the directive. */
1341 if (pfile->state.parsing_args)
1343 if (pfile->state.parsing_args == 2)
1345 "directives may not be used inside a macro argument");
1347 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1348 buffer->extra_char = buffer->read_ahead;
1349 buffer->read_ahead = '#';
1350 pfile->state.next_bol = 1;
1351 result->type = CPP_EOF;
1353 /* Get whitespace right - newline_in_args sets it. */
1354 if (pfile->lexer_pos.col == 1)
1355 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1359 /* This is the hash introducing a directive. */
1360 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1361 goto done_directive; /* bol still 1. */
1362 /* This is in fact an assembler #. */
1367 result->type = CPP_OR;
1368 c = get_effective_char (buffer);
1370 ACCEPT_CHAR (CPP_OR_EQ);
1372 ACCEPT_CHAR (CPP_OR_OR);
1376 result->type = CPP_XOR;
1377 if (get_effective_char (buffer) == '=')
1378 ACCEPT_CHAR (CPP_XOR_EQ);
1382 result->type = CPP_COLON;
1383 c = get_effective_char (buffer);
1384 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1385 ACCEPT_CHAR (CPP_SCOPE);
1386 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1388 result->flags |= DIGRAPH;
1389 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1393 case '~': result->type = CPP_COMPL; break;
1394 case ',': result->type = CPP_COMMA; break;
1395 case '(': result->type = CPP_OPEN_PAREN; break;
1396 case ')': result->type = CPP_CLOSE_PAREN; break;
1397 case '[': result->type = CPP_OPEN_SQUARE; break;
1398 case ']': result->type = CPP_CLOSE_SQUARE; break;
1399 case '{': result->type = CPP_OPEN_BRACE; break;
1400 case '}': result->type = CPP_CLOSE_BRACE; break;
1401 case ';': result->type = CPP_SEMICOLON; break;
1403 /* @ is a punctuator in Objective C. */
1404 case '@': result->type = CPP_ATSIGN; break;
1408 result->type = CPP_OTHER;
1413 if (pfile->skipping)
1416 /* If not in a directive, this token invalidates controlling macros. */
1417 if (!pfile->state.in_directive)
1418 pfile->mi_state = MI_FAILED;
1421 /* An upper bound on the number of bytes needed to spell a token,
1422 including preceding whitespace. */
1424 cpp_token_len (token)
1425 const cpp_token *token;
1429 switch (TOKEN_SPELL (token))
1431 default: len = 0; break;
1432 case SPELL_STRING: len = token->val.str.len; break;
1433 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1435 /* 1 for whitespace, 4 for comment delimeters. */
1439 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1440 already contain the enough space to hold the token's spelling.
1441 Returns a pointer to the character after the last character
1444 cpp_spell_token (pfile, token, buffer)
1445 cpp_reader *pfile; /* Would be nice to be rid of this... */
1446 const cpp_token *token;
1447 unsigned char *buffer;
1449 switch (TOKEN_SPELL (token))
1451 case SPELL_OPERATOR:
1453 const unsigned char *spelling;
1456 if (token->flags & DIGRAPH)
1458 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1459 else if (token->flags & NAMED_OP)
1462 spelling = TOKEN_NAME (token);
1464 while ((c = *spelling++) != '\0')
1471 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1472 buffer += NODE_LEN (token->val.node);
1477 int left, right, tag;
1478 switch (token->type)
1480 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1481 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1482 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1483 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1484 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1485 default: left = '\0'; right = '\0'; tag = '\0'; break;
1487 if (tag) *buffer++ = tag;
1488 if (left) *buffer++ = left;
1489 memcpy (buffer, token->val.str.text, token->val.str.len);
1490 buffer += token->val.str.len;
1491 if (right) *buffer++ = right;
1496 *buffer++ = token->val.c;
1500 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1507 /* Returns a token as a null-terminated string. The string is
1508 temporary, and automatically freed later. Useful for diagnostics. */
1510 cpp_token_as_text (pfile, token)
1512 const cpp_token *token;
1514 unsigned int len = cpp_token_len (token);
1515 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1517 end = cpp_spell_token (pfile, token, start);
1523 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1525 cpp_type2name (type)
1526 enum cpp_ttype type;
1528 return (const char *) token_spellings[type].name;
1531 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1532 for efficiency - to avoid double-buffering. Also, outputs a space
1533 if PREV_WHITE is flagged. */
1535 cpp_output_token (token, fp)
1536 const cpp_token *token;
1539 if (token->flags & PREV_WHITE)
1542 switch (TOKEN_SPELL (token))
1544 case SPELL_OPERATOR:
1546 const unsigned char *spelling;
1548 if (token->flags & DIGRAPH)
1550 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1551 else if (token->flags & NAMED_OP)
1554 spelling = TOKEN_NAME (token);
1556 ufputs (spelling, fp);
1562 ufputs (NODE_NAME (token->val.node), fp);
1567 int left, right, tag;
1568 switch (token->type)
1570 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1571 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1572 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1573 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1574 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1575 default: left = '\0'; right = '\0'; tag = '\0'; break;
1577 if (tag) putc (tag, fp);
1578 if (left) putc (left, fp);
1579 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1580 if (right) putc (right, fp);
1585 putc (token->val.c, fp);
1589 /* An error, most probably. */
1594 /* Compare two tokens. */
1596 _cpp_equiv_tokens (a, b)
1597 const cpp_token *a, *b;
1599 if (a->type == b->type && a->flags == b->flags)
1600 switch (TOKEN_SPELL (a))
1602 default: /* Keep compiler happy. */
1603 case SPELL_OPERATOR:
1606 return a->val.c == b->val.c; /* Character. */
1608 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1610 return a->val.node == b->val.node;
1612 return (a->val.str.len == b->val.str.len
1613 && !memcmp (a->val.str.text, b->val.str.text,
1620 /* Determine whether two tokens can be pasted together, and if so,
1621 what the resulting token is. Returns CPP_EOF if the tokens cannot
1622 be pasted, or the appropriate type for the merged token if they
1625 cpp_can_paste (pfile, token1, token2, digraph)
1627 const cpp_token *token1, *token2;
1630 enum cpp_ttype a = token1->type, b = token2->type;
1631 int cxx = CPP_OPTION (pfile, cplusplus);
1633 /* Treat named operators as if they were ordinary NAMEs. */
1634 if (token1->flags & NAMED_OP)
1636 if (token2->flags & NAMED_OP)
1639 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1640 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1645 if (b == a) return CPP_RSHIFT;
1646 if (b == CPP_QUERY && cxx) return CPP_MAX;
1647 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1650 if (b == a) return CPP_LSHIFT;
1651 if (b == CPP_QUERY && cxx) return CPP_MIN;
1652 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1653 if (CPP_OPTION (pfile, digraphs))
1656 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1658 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1662 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1663 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1664 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1667 if (b == a) return CPP_MINUS_MINUS;
1668 if (b == CPP_GREATER) return CPP_DEREF;
1671 if (b == a && cxx) return CPP_SCOPE;
1672 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1673 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1677 if (CPP_OPTION (pfile, digraphs))
1679 if (b == CPP_GREATER)
1680 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1682 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1686 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1689 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1690 if (b == CPP_NUMBER) return CPP_NUMBER;
1694 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1696 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1700 if (b == CPP_NAME) return CPP_NAME;
1702 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1704 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1706 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1710 if (b == CPP_NUMBER) return CPP_NUMBER;
1711 if (b == CPP_NAME) return CPP_NUMBER;
1712 if (b == CPP_DOT) return CPP_NUMBER;
1713 /* Numbers cannot have length zero, so this is safe. */
1714 if ((b == CPP_PLUS || b == CPP_MINUS)
1715 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1726 /* Returns nonzero if a space should be inserted to avoid an
1727 accidental token paste for output. For simplicity, it is
1728 conservative, and occasionally advises a space where one is not
1729 needed, e.g. "." and ".2". */
1732 cpp_avoid_paste (pfile, token1, token2)
1734 const cpp_token *token1, *token2;
1736 enum cpp_ttype a = token1->type, b = token2->type;
1739 if (token1->flags & NAMED_OP)
1741 if (token2->flags & NAMED_OP)
1745 if (token2->flags & DIGRAPH)
1746 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1747 else if (token_spellings[b].category == SPELL_OPERATOR)
1748 c = token_spellings[b].name[0];
1750 /* Quickly get everything that can paste with an '='. */
1751 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1756 case CPP_GREATER: return c == '>' || c == '?';
1757 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1758 case CPP_PLUS: return c == '+';
1759 case CPP_MINUS: return c == '-' || c == '>';
1760 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1761 case CPP_MOD: return c == ':' || c == '>';
1762 case CPP_AND: return c == '&';
1763 case CPP_OR: return c == '|';
1764 case CPP_COLON: return c == ':' || c == '>';
1765 case CPP_DEREF: return c == '*';
1766 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1767 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1768 case CPP_NAME: return ((b == CPP_NUMBER
1769 && name_p (pfile, &token2->val.str))
1771 || b == CPP_CHAR || b == CPP_STRING); /* L */
1772 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1773 || c == '.' || c == '+' || c == '-');
1774 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1775 && token1->val.c == '@'
1776 && (b == CPP_NAME || b == CPP_STRING));
1783 /* Output all the remaining tokens on the current line, and a newline
1784 character, to FP. Leading whitespace is removed. */
1786 cpp_output_line (pfile, fp)
1792 cpp_get_token (pfile, &token);
1793 token.flags &= ~PREV_WHITE;
1794 while (token.type != CPP_EOF)
1796 cpp_output_token (&token, fp);
1797 cpp_get_token (pfile, &token);
1803 /* Returns the value of a hexadecimal digit. */
1808 if (c >= 'a' && c <= 'f')
1809 return c - 'a' + 10;
1810 if (c >= 'A' && c <= 'F')
1811 return c - 'A' + 10;
1812 if (c >= '0' && c <= '9')
1817 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1818 failure if cpplib is not parsing C++ or C99. Such failure is
1819 silent, and no variables are updated. Otherwise returns 0, and
1820 warns if -Wtraditional.
1822 [lex.charset]: The character designated by the universal character
1823 name \UNNNNNNNN is that character whose character short name in
1824 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1825 universal character name \uNNNN is that character whose character
1826 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1827 for a universal character name is less than 0x20 or in the range
1828 0x7F-0x9F (inclusive), or if the universal character name
1829 designates a character in the basic source character set, then the
1830 program is ill-formed.
1832 We assume that wchar_t is Unicode, so we don't need to do any
1833 mapping. Is this ever wrong?
1835 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1836 LIMIT is the end of the string or charconst. PSTR is updated to
1837 point after the UCS on return, and the UCS is written into PC. */
1840 maybe_read_ucs (pfile, pstr, limit, pc)
1842 const unsigned char **pstr;
1843 const unsigned char *limit;
1846 const unsigned char *p = *pstr;
1847 unsigned int code = 0;
1848 unsigned int c = *pc, length;
1850 /* Only attempt to interpret a UCS for C++ and C99. */
1851 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1854 if (CPP_WTRADITIONAL (pfile))
1855 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1857 length = (c == 'u' ? 4: 8);
1859 if ((size_t) (limit - p) < length)
1861 cpp_error (pfile, "incomplete universal-character-name");
1862 /* Skip to the end to avoid more diagnostics. */
1867 for (; length; length--, p++)
1871 code = (code << 4) + hex_digit_value (c);
1875 "non-hex digit '%c' in universal-character-name", c);
1876 /* We shouldn't skip in case there are multibyte chars. */
1882 #ifdef TARGET_EBCDIC
1883 cpp_error (pfile, "universal-character-name on EBCDIC target");
1884 code = 0x3f; /* EBCDIC invalid character */
1886 /* True extended characters are OK. */
1888 && !(code & 0x80000000)
1889 && !(code >= 0xD800 && code <= 0xDFFF))
1891 /* The standard permits $, @ and ` to be specified as UCNs. We use
1892 hex escapes so that this also works with EBCDIC hosts. */
1893 else if (code == 0x24 || code == 0x40 || code == 0x60)
1895 /* Don't give another error if one occurred above. */
1896 else if (length == 0)
1897 cpp_error (pfile, "universal-character-name out of range");
1905 /* Interpret an escape sequence, and return its value. PSTR points to
1906 the input pointer, which is just after the backslash. LIMIT is how
1907 much text we have. MASK is a bitmask for the precision for the
1908 destination type (char or wchar_t). TRADITIONAL, if true, does not
1909 interpret escapes that did not exist in traditional C.
1911 Handles all relevant diagnostics. */
1914 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1916 const unsigned char **pstr;
1917 const unsigned char *limit;
1918 unsigned HOST_WIDE_INT mask;
1922 const unsigned char *str = *pstr;
1923 unsigned int c = *str++;
1927 case '\\': case '\'': case '"': case '?': break;
1928 case 'b': c = TARGET_BS; break;
1929 case 'f': c = TARGET_FF; break;
1930 case 'n': c = TARGET_NEWLINE; break;
1931 case 'r': c = TARGET_CR; break;
1932 case 't': c = TARGET_TAB; break;
1933 case 'v': c = TARGET_VT; break;
1935 case '(': case '{': case '[': case '%':
1936 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1937 '\%' is used to prevent SCCS from getting confused. */
1938 unknown = CPP_PEDANTIC (pfile);
1942 if (CPP_WTRADITIONAL (pfile))
1943 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1949 if (CPP_PEDANTIC (pfile))
1950 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1955 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1959 if (CPP_WTRADITIONAL (pfile))
1960 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1964 unsigned int i = 0, overflow = 0;
1965 int digits_found = 0;
1973 overflow |= i ^ (i << 4 >> 4);
1974 i = (i << 4) + hex_digit_value (c);
1979 cpp_error (pfile, "\\x used with no following hex digits");
1981 if (overflow | (i != (i & mask)))
1983 cpp_pedwarn (pfile, "hex escape sequence out of range");
1990 case '0': case '1': case '2': case '3':
1991 case '4': case '5': case '6': case '7':
1993 unsigned int i = c - '0';
1996 while (str < limit && ++count < 3)
1999 if (c < '0' || c > '7')
2002 i = (i << 3) + c - '0';
2005 if (i != (i & mask))
2007 cpp_pedwarn (pfile, "octal escape sequence out of range");
2022 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
2024 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
2028 cpp_pedwarn (pfile, "escape sequence out of range for character");
2034 #ifndef MAX_CHAR_TYPE_SIZE
2035 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
2038 #ifndef MAX_WCHAR_TYPE_SIZE
2039 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
2042 /* Interpret a (possibly wide) character constant in TOKEN.
2043 WARN_MULTI warns about multi-character charconsts, if not
2044 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
2045 that did not exist in traditional C. PCHARS_SEEN points to a
2046 variable that is filled in with the number of characters seen. */
2048 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
2050 const cpp_token *token;
2053 unsigned int *pchars_seen;
2055 const unsigned char *str = token->val.str.text;
2056 const unsigned char *limit = str + token->val.str.len;
2057 unsigned int chars_seen = 0;
2058 unsigned int width, max_chars, c;
2059 unsigned HOST_WIDE_INT mask;
2060 HOST_WIDE_INT result = 0;
2062 #ifdef MULTIBYTE_CHARS
2063 (void) local_mbtowc (NULL, NULL, 0);
2066 /* Width in bits. */
2067 if (token->type == CPP_CHAR)
2068 width = MAX_CHAR_TYPE_SIZE;
2070 width = MAX_WCHAR_TYPE_SIZE;
2072 if (width < HOST_BITS_PER_WIDE_INT)
2073 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2076 max_chars = HOST_BITS_PER_WIDE_INT / width;
2080 #ifdef MULTIBYTE_CHARS
2084 char_len = local_mbtowc (&wc, str, limit - str);
2087 cpp_warning (pfile, "ignoring invalid multibyte character");
2100 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2102 #ifdef MAP_CHARACTER
2104 c = MAP_CHARACTER (c);
2107 /* Merge character into result; ignore excess chars. */
2108 if (++chars_seen <= max_chars)
2110 if (width < HOST_BITS_PER_WIDE_INT)
2111 result = (result << width) | (c & mask);
2117 if (chars_seen == 0)
2118 cpp_error (pfile, "empty character constant");
2119 else if (chars_seen > max_chars)
2121 chars_seen = max_chars;
2122 cpp_warning (pfile, "character constant too long");
2124 else if (chars_seen > 1 && !traditional && warn_multi)
2125 cpp_warning (pfile, "multi-character character constant");
2127 /* If char type is signed, sign-extend the constant. The
2128 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2129 if (token->type == CPP_CHAR && chars_seen)
2131 unsigned int nbits = chars_seen * width;
2132 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2134 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2135 || ((result >> (nbits - 1)) & 1) == 0)
2141 *pchars_seen = chars_seen;
2157 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2160 chunk_suitable (pool, chunk, size)
2165 /* Being at least twice SIZE means we can use memcpy in
2166 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2168 return (chunk && pool->locked != chunk
2169 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2172 /* Returns the end of the new pool. PTR points to a char in the old
2173 pool, and is updated to point to the same char in the new pool. */
2175 _cpp_next_chunk (pool, len, ptr)
2178 unsigned char **ptr;
2180 cpp_chunk *chunk = pool->cur->next;
2182 /* LEN is the minimum size we want in the new pool. */
2183 len += POOL_ROOM (pool);
2184 if (! chunk_suitable (pool, chunk, len))
2186 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2188 chunk->next = pool->cur->next;
2189 pool->cur->next = chunk;
2192 /* Update the pointer before changing chunk's front. */
2194 *ptr += chunk->base - POOL_FRONT (pool);
2196 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2197 chunk->front = chunk->base;
2200 return POOL_LIMIT (pool);
2207 unsigned char *base;
2210 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2211 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2212 /* Put the chunk descriptor at the end. Then chunk overruns will
2213 cause obvious chaos. */
2214 result = (cpp_chunk *) (base + size);
2215 result->base = base;
2216 result->front = base;
2217 result->limit = base + size;
2224 _cpp_init_pool (pool, size, align, temp)
2226 unsigned int size, align, temp;
2229 align = DEFAULT_ALIGNMENT;
2230 if (align & (align - 1))
2232 pool->align = align;
2233 pool->cur = new_chunk (size);
2237 pool->cur->next = pool->cur;
2241 _cpp_lock_pool (pool)
2244 if (pool->locks++ == 0)
2245 pool->locked = pool->cur;
2249 _cpp_unlock_pool (pool)
2252 if (--pool->locks == 0)
2257 _cpp_free_pool (pool)
2260 cpp_chunk *chunk = pool->cur, *next;
2268 while (chunk && chunk != pool->cur);
2271 /* Reserve LEN bytes from a memory pool. */
2273 _cpp_pool_reserve (pool, len)
2277 len = POOL_ALIGN (len, pool->align);
2278 if (len > (unsigned int) POOL_ROOM (pool))
2279 _cpp_next_chunk (pool, len, 0);
2281 return POOL_FRONT (pool);
2284 /* Allocate LEN bytes from a memory pool. */
2286 _cpp_pool_alloc (pool, len)
2290 unsigned char *result = _cpp_pool_reserve (pool, len);
2292 POOL_COMMIT (pool, len);