git.gag.com Git - fw/sdcc/blob - support/cpp2/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27 #include <assert.h>
  28
  29 #ifdef MULTIBYTE_CHARS
  30 #include "mbchar.h"
  31 #include <locale.h>
  32 #endif
  33
  34 /* Tokens with SPELL_STRING store their spelling in the token list,
  35    and it's length in the token->val.name.len.  */
  36 enum spell_type
  37 {
  38   SPELL_OPERATOR = 0,
  39   SPELL_CHAR,
  40   SPELL_IDENT,
  41   SPELL_NUMBER,
  42   SPELL_STRING,
  43   SPELL_NONE
  44 };
  45
  46 struct token_spelling
  47 {
  48   enum spell_type category;
  49   const unsigned char *name;
  50 };
  51
  52 static const unsigned char *const digraph_spellings[] =
  53 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  54
  55 #define OP(e, s) { SPELL_OPERATOR, U s           },
  56 #define TK(e, s) { s,              U STRINGX (e) },
  57 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  58 #undef OP
  59 #undef TK
  60
  61 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  62 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  63 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  64
  65 static void handle_newline PARAMS ((cpp_reader *));
  66 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  67 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  68
  69 static int skip_asm_block PARAMS ((cpp_reader *));
  70 static int skip_block_comment PARAMS ((cpp_reader *));
  71 static int skip_line_comment PARAMS ((cpp_reader *));
  72 static void adjust_column PARAMS ((cpp_reader *));
  73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  76                                   unsigned int *));
  77 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  78 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  79 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  80 static bool trigraph_p PARAMS ((cpp_reader *));
  81 static unsigned int copy_text_chars PARAMS ((char *, const char *, unsigned int));
  82 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const uchar *));
  83 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  84                                   cppchar_t));
  85 static bool continue_after_nul PARAMS ((cpp_reader *));
  86 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  87 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  88                                    const unsigned char *, cppchar_t *));
  89 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  90
  91 static unsigned int hex_digit_value PARAMS ((unsigned int));
  92 static _cpp_buff *new_buff PARAMS ((size_t));
  93
  94 /* Utility routine:
  95
  96    Compares, the token TOKEN to the NUL-terminated string STRING.
  97    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  98 int
  99 cpp_ideq (token, string)
 100      const cpp_token *token;
 101      const char *string;
 102 {
 103   if (token->type != CPP_NAME)
 104     return 0;
 105
 106   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 107 }
 108
 109 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 110    Returns with buffer->cur pointing to the character immediately
 111    following the newline (combination).  */
 112 static void
 113 handle_newline (pfile)
 114      cpp_reader *pfile;
 115 {
 116   cpp_buffer *buffer = pfile->buffer;
 117
 118   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 119      only accept CR-LF; maybe we should fall back to that behavior?  */
 120   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 121     buffer->cur++;
 122
 123   buffer->line_base = buffer->cur;
 124   buffer->col_adjust = 0;
 125   pfile->line++;
 126 }
 127
 128 /* Subroutine of skip_escaped_newlines; called when a 3-character
 129    sequence beginning with "??" is encountered.  buffer->cur points to
 130    the second '?'.
 131
 132    Warn if necessary, and returns true if the sequence forms a
 133    trigraph and the trigraph should be honored.  */
 134 static bool
 135 trigraph_p (pfile)
 136      cpp_reader *pfile;
 137 {
 138   cpp_buffer *buffer = pfile->buffer;
 139   cppchar_t from_char = buffer->cur[1];
 140   bool accept;
 141
 142   if (!_cpp_trigraph_map[from_char])
 143     return false;
 144
 145   accept = CPP_OPTION (pfile, trigraphs);
 146
 147   /* Don't warn about trigraphs in comments.  */
 148   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 149     {
 150       if (accept)
 151         cpp_error_with_line (pfile, DL_WARNING,
 152                              pfile->line, CPP_BUF_COL (buffer) - 1,
 153                              "trigraph ??%c converted to %c",
 154                              (int) from_char,
 155                              (int) _cpp_trigraph_map[from_char]);
 156       else if (buffer->cur != buffer->last_Wtrigraphs)
 157         {
 158           buffer->last_Wtrigraphs = buffer->cur;
 159           cpp_error_with_line (pfile, DL_WARNING,
 160                                pfile->line, CPP_BUF_COL (buffer) - 1,
 161                                "trigraph ??%c ignored", (int) from_char);
 162         }
 163     }
 164
 165   return accept;
 166 }
 167
 168 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 169    lie in buffer->cur[-1].  Returns the next byte, which will be in
 170    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 171    2 of the ISO C standard.  */
 172 static cppchar_t
 173 skip_escaped_newlines (pfile)
 174      cpp_reader *pfile;
 175 {
 176   cpp_buffer *buffer = pfile->buffer;
 177   cppchar_t next = buffer->cur[-1];
 178
 179   /* Only do this if we apply stages 1 and 2.  */
 180   if (!buffer->from_stage3)
 181     {
 182       const unsigned char *saved_cur;
 183       cppchar_t next1;
 184
 185       do
 186         {
 187           if (next == '?')
 188             {
 189               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 190                 break;
 191
 192               /* Translate the trigraph.  */
 193               next = _cpp_trigraph_map[buffer->cur[1]];
 194               buffer->cur += 2;
 195               if (next != '\\')
 196                 break;
 197             }
 198
 199           if (buffer->cur == buffer->rlimit)
 200             break;
 201
 202           /* We have a backslash, and room for at least one more
 203              character.  Skip horizontal whitespace.  */
 204           saved_cur = buffer->cur;
 205           do
 206             next1 = *buffer->cur++;
 207           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 208
 209           if (!is_vspace (next1))
 210             {
 211               buffer->cur = saved_cur;
 212               break;
 213             }
 214
 215           if (saved_cur != buffer->cur - 1
 216               && !pfile->state.lexing_comment)
 217             cpp_error (pfile, DL_WARNING,
 218                        "backslash and newline separated by space");
 219
 220           handle_newline (pfile);
 221           buffer->backup_to = buffer->cur;
 222           if (buffer->cur == buffer->rlimit)
 223             {
 224               cpp_error (pfile, DL_PEDWARN,
 225                          "backslash-newline at end of file");
 226               next = EOF;
 227             }
 228           else
 229             next = *buffer->cur++;
 230         }
 231       while (next == '\\' || next == '?');
 232     }
 233
 234   return next;
 235 }
 236
 237 /* Obtain the next character, after trigraph conversion and skipping
 238    an arbitrarily long string of escaped newlines.  The common case of
 239    no trigraphs or escaped newlines falls through quickly.  On return,
 240    buffer->backup_to points to where to return to if the character is
 241    not to be processed.  */
 242 static cppchar_t
 243 get_effective_char (pfile)
 244      cpp_reader *pfile;
 245 {
 246   cppchar_t next;
 247   cpp_buffer *buffer = pfile->buffer;
 248
 249   buffer->backup_to = buffer->cur;
 250   next = *buffer->cur++;
 251   if (__builtin_expect (next == '?' || next == '\\', 0))
 252     next = skip_escaped_newlines (pfile);
 253
 254   return next;
 255 }
 256
 257 /* SDCC _asm specific */
 258 /* Skip an _asm ... _endasm block.  We find the end of the comment by
 259    seeing _endasm.  Returns non-zero if _asm terminated by EOF, zero
 260    otherwise.  */
 261 static int
 262 skip_asm_block (pfile)
 263      cpp_reader *pfile;
 264 {
 265 #define _ENDASM_STR "endasm"
 266 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
 267
 268   cpp_buffer *buffer = pfile->buffer;
 269   cppchar_t c = EOF;
 270   int prev_space = 0;
 271   int ret = 1;
 272
 273   pfile->state.lexing_comment = 1;
 274   while (buffer->cur != buffer->rlimit)
 275     {
 276       prev_space = is_space(c);
 277       c = *buffer->cur++;
 278
 279       /* FIXME: For speed, create a new character class of characters
 280          of interest inside block comments.  */
 281       if (c == '?' || c == '\\')
 282         c = skip_escaped_newlines (pfile);
 283
 284       if (prev_space && c == '_')
 285         {
 286           if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
 287             strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
 288             {
 289               buffer->cur += _ENDASM_LEN;
 290               ret = 0;
 291               break;
 292             }
 293         }
 294       else if (is_vspace (c))
 295         {
 296           prev_space = is_space(c);
 297           handle_newline (pfile);
 298         }
 299       else if (c == '\t')
 300         adjust_column (pfile);
 301     }
 302
 303   pfile->state.lexing_comment = 0;
 304   return ret;
 305 }
 306
 307 /* Skip a C-style block comment.  We find the end of the comment by
 308    seeing if an asterisk is before every '/' we encounter.  Returns
 309    nonzero if comment terminated by EOF, zero otherwise.  */
 310 static int
 311 skip_block_comment (pfile)
 312      cpp_reader *pfile;
 313 {
 314   cpp_buffer *buffer = pfile->buffer;
 315   cppchar_t c = EOF, prevc = EOF;
 316
 317   pfile->state.lexing_comment = 1;
 318   while (buffer->cur != buffer->rlimit)
 319     {
 320       prevc = c, c = *buffer->cur++;
 321
 322       /* FIXME: For speed, create a new character class of characters
 323          of interest inside block comments.  */
 324       if (c == '?' || c == '\\')
 325         c = skip_escaped_newlines (pfile);
 326
 327       /* People like decorating comments with '*', so check for '/'
 328          instead for efficiency.  */
 329       if (c == '/')
 330         {
 331           if (prevc == '*')
 332             break;
 333
 334           /* Warn about potential nested comments, but not if the '/'
 335              comes immediately before the true comment delimiter.
 336              Don't bother to get it right across escaped newlines.  */
 337           if (CPP_OPTION (pfile, warn_comments)
 338               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 339             cpp_error_with_line (pfile, DL_WARNING,
 340                                  pfile->line, CPP_BUF_COL (buffer),
 341                                  "\"/*\" within comment");
 342         }
 343       else if (is_vspace (c))
 344         handle_newline (pfile);
 345       else if (c == '\t')
 346         adjust_column (pfile);
 347     }
 348
 349   pfile->state.lexing_comment = 0;
 350   return c != '/' || prevc != '*';
 351 }
 352
 353 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 354    terminating newline.  Handles escaped newlines.  Returns nonzero
 355    if a multiline comment.  */
 356 static int
 357 skip_line_comment (pfile)
 358      cpp_reader *pfile;
 359 {
 360   cpp_buffer *buffer = pfile->buffer;
 361   unsigned int orig_line = pfile->line;
 362   cppchar_t c;
 363 #ifdef MULTIBYTE_CHARS
 364   wchar_t wc;
 365   int char_len;
 366 #endif
 367
 368   pfile->state.lexing_comment = 1;
 369 #ifdef MULTIBYTE_CHARS
 370   /* Reset multibyte conversion state.  */
 371   (void) local_mbtowc (NULL, NULL, 0);
 372 #endif
 373   do
 374     {
 375       if (buffer->cur == buffer->rlimit)
 376         goto at_eof;
 377
 378 #ifdef MULTIBYTE_CHARS
 379       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 380                                buffer->rlimit - buffer->cur);
 381       if (char_len == -1)
 382         {
 383           cpp_error (pfile, DL_WARNING,
 384                      "ignoring invalid multibyte character");
 385           char_len = 1;
 386           c = *buffer->cur++;
 387         }
 388       else
 389         {
 390           buffer->cur += char_len;
 391           c = wc;
 392         }
 393 #else
 394       c = *buffer->cur++;
 395 #endif
 396       if (c == '?' || c == '\\')
 397         c = skip_escaped_newlines (pfile);
 398     }
 399   while (!is_vspace (c));
 400
 401   /* Step back over the newline, except at EOF.  */
 402   buffer->cur--;
 403  at_eof:
 404
 405   pfile->state.lexing_comment = 0;
 406   return orig_line != pfile->line;
 407 }
 408
 409 /* pfile->buffer->cur is one beyond the \t character.  Update
 410    col_adjust so we track the column correctly.  */
 411 static void
 412 adjust_column (pfile)
 413      cpp_reader *pfile;
 414 {
 415   cpp_buffer *buffer = pfile->buffer;
 416   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 417
 418   /* Round it up to multiple of the tabstop, but subtract 1 since the
 419      tab itself occupies a character position.  */
 420   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 421                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 422 }
 423
 424 /* Skips whitespace, saving the next non-whitespace character.
 425    Adjusts pfile->col_adjust to account for tabs.  Without this,
 426    tokens might be assigned an incorrect column.  */
 427 static int
 428 skip_whitespace (pfile, c)
 429      cpp_reader *pfile;
 430      cppchar_t c;
 431 {
 432   cpp_buffer *buffer = pfile->buffer;
 433   unsigned int warned = 0;
 434
 435   do
 436     {
 437       /* Horizontal space always OK.  */
 438       if (c == ' ')
 439         ;
 440       else if (c == '\t')
 441         adjust_column (pfile);
 442       /* Just \f \v or \0 left.  */
 443       else if (c == '\0')
 444         {
 445           if (buffer->cur - 1 == buffer->rlimit)
 446             return 0;
 447           if (!warned)
 448             {
 449               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 450               warned = 1;
 451             }
 452         }
 453       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 454         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 455                              CPP_BUF_COL (buffer),
 456                              "%s in preprocessing directive",
 457                              c == '\f' ? "form feed" : "vertical tab");
 458
 459       c = *buffer->cur++;
 460     }
 461   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 462   while (is_nvspace (c));
 463
 464   buffer->cur--;
 465   return 1;
 466 }
 467
 468 /* See if the characters of a number token are valid in a name (no
 469    '.', '+' or '-').  */
 470 static int
 471 name_p (pfile, string)
 472      cpp_reader *pfile;
 473      const cpp_string *string;
 474 {
 475   unsigned int i;
 476
 477   for (i = 0; i < string->len; i++)
 478     if (!is_idchar (string->text[i]))
 479       return 0;
 480
 481   return 1;
 482 }
 483
 484 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 485    a critical inner loop.  The common case is an identifier which has
 486    not been split by backslash-newline, does not contain a dollar
 487    sign, and has already been scanned (roughly 10:1 ratio of
 488    seen:unseen identifiers in normal code; the distribution is
 489    Poisson-like).  Second most common case is a new identifier, not
 490    split and no dollar sign.  The other possibilities are rare and
 491    have been relegated to parse_slow.  */
 492 static cpp_hashnode *
 493 parse_identifier (pfile)
 494      cpp_reader *pfile;
 495 {
 496   cpp_hashnode *result;
 497   const uchar *cur, *base;
 498
 499   /* Fast-path loop.  Skim over a normal identifier.
 500      N.B. ISIDNUM does not include $.  */
 501   cur = pfile->buffer->cur;
 502   while (ISIDNUM (*cur))
 503     cur++;
 504
 505   /* Check for slow-path cases.  */
 506   if (*cur == '?' || *cur == '\\' || *cur == '$')
 507     {
 508       unsigned int len;
 509
 510       base = parse_slow (pfile, cur, 0, &len);
 511       result = (cpp_hashnode *)
 512         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 513     }
 514   else
 515     {
 516       base = pfile->buffer->cur - 1;
 517       pfile->buffer->cur = cur;
 518       result = (cpp_hashnode *)
 519         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 520     }
 521
 522   /* Rarely, identifiers require diagnostics when lexed.
 523      XXX Has to be forced out of the fast path.  */
 524   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 525                         && !pfile->state.skipping, 0))
 526     {
 527       /* It is allowed to poison the same identifier twice.  */
 528       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 529         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 530                    NODE_NAME (result));
 531
 532       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 533          replacement list of a variadic macro.  */
 534       if (result == pfile->spec_nodes.n__VA_ARGS__
 535           && !pfile->state.va_args_ok)
 536         cpp_error (pfile, DL_PEDWARN,
 537         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 538     }
 539
 540   return result;
 541 }
 542
 543 /* Slow path.  This handles numbers and identifiers which have been
 544    split, or contain dollar signs.  The part of the token from
 545    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 546    1 if it's a number, and 2 if it has a leading period.  Returns a
 547    pointer to the token's NUL-terminated spelling in permanent
 548    storage, and sets PLEN to its length.  */
 549 static uchar *
 550 parse_slow (pfile, cur, number_p, plen)
 551      cpp_reader *pfile;
 552      const uchar *cur;
 553      int number_p;
 554      unsigned int *plen;
 555 {
 556   cpp_buffer *buffer = pfile->buffer;
 557   const uchar *base = buffer->cur - 1;
 558   struct obstack *stack = &pfile->hash_table->stack;
 559   unsigned int c, prevc, saw_dollar = 0;
 560
 561   /* Place any leading period.  */
 562   if (number_p == 2)
 563     obstack_1grow (stack, '.');
 564
 565   /* Copy the part of the token which is known to be okay.  */
 566   obstack_grow (stack, base, cur - base);
 567
 568   /* Now process the part which isn't.  We are looking at one of
 569      '$', '\\', or '?' on entry to this loop.  */
 570   prevc = cur[-1];
 571   c = *cur++;
 572   buffer->cur = cur;
 573   for (;;)
 574     {
 575       /* Potential escaped newline?  */
 576       buffer->backup_to = buffer->cur - 1;
 577       if (c == '?' || c == '\\')
 578         c = skip_escaped_newlines (pfile);
 579
 580       if (!is_idchar (c))
 581         {
 582           if (!number_p)
 583             break;
 584           if (c != '.' && !VALID_SIGN (c, prevc))
 585             break;
 586         }
 587
 588       /* Handle normal identifier characters in this loop.  */
 589       do
 590         {
 591           prevc = c;
 592           obstack_1grow (stack, c);
 593
 594           if (c == '$')
 595             saw_dollar++;
 596
 597           c = *buffer->cur++;
 598         }
 599       while (is_idchar (c));
 600     }
 601
 602   /* Step back over the unwanted char.  */
 603   BACKUP ();
 604
 605   /* $ is not an identifier character in the standard, but is commonly
 606      accepted as an extension.  Don't warn about it in skipped
 607      conditional blocks.  */
 608   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 609     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 610
 611   /* Identifiers and numbers are null-terminated.  */
 612   *plen = obstack_object_size (stack);
 613   obstack_1grow (stack, '\0');
 614   return obstack_finish (stack);
 615 }
 616
 617 /* Parse a number, beginning with character C, skipping embedded
 618    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 619    before C.  Place the result in NUMBER.  */
 620 static void
 621 parse_number (pfile, number, leading_period)
 622      cpp_reader *pfile;
 623      cpp_string *number;
 624      int leading_period;
 625 {
 626   const uchar *cur;
 627
 628   /* Fast-path loop.  Skim over a normal number.
 629      N.B. ISIDNUM does not include $.  */
 630   cur = pfile->buffer->cur;
 631   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 632     cur++;
 633
 634   /* Check for slow-path cases.  */
 635   if (*cur == '?' || *cur == '\\' || *cur == '$')
 636     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 637   else
 638     {
 639       const uchar *base = pfile->buffer->cur - 1;
 640       uchar *dest;
 641
 642       number->len = cur - base + leading_period;
 643       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 644       dest[number->len] = '\0';
 645       number->text = dest;
 646
 647       if (leading_period)
 648         *dest++ = '.';
 649       memcpy (dest, base, cur - base);
 650       pfile->buffer->cur = cur;
 651     }
 652 }
 653
 654 /* Subroutine of parse_string.  */
 655 static int
 656 unescaped_terminator_p (pfile, dest)
 657      cpp_reader *pfile;
 658      const unsigned char *dest;
 659 {
 660   const unsigned char *start, *temp;
 661
 662   /* In #include-style directives, terminators are not escapeable.  */
 663   if (pfile->state.angled_headers)
 664     return 1;
 665
 666   start = BUFF_FRONT (pfile->u_buff);
 667
 668   /* An odd number of consecutive backslashes represents an escaped
 669      terminator.  */
 670   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 671     ;
 672
 673   return ((dest - temp) & 1) == 0;
 674 }
 675
 676 /* Parses a string, character constant, or angle-bracketed header file
 677    name.  Handles embedded trigraphs and escaped newlines.  The stored
 678    string is guaranteed NUL-terminated, but it is not guaranteed that
 679    this is the first NUL since embedded NULs are preserved.
 680
 681    When this function returns, buffer->cur points to the next
 682    character to be processed.  */
 683 static void
 684 parse_string (pfile, token, terminator)
 685      cpp_reader *pfile;
 686      cpp_token *token;
 687      cppchar_t terminator;
 688 {
 689   cpp_buffer *buffer = pfile->buffer;
 690   unsigned char *dest, *limit;
 691   cppchar_t c;
 692   bool warned_nulls = false;
 693 #ifdef MULTIBYTE_CHARS
 694   wchar_t wc;
 695   int char_len;
 696 #endif
 697
 698   dest = BUFF_FRONT (pfile->u_buff);
 699   limit = BUFF_LIMIT (pfile->u_buff);
 700
 701 #ifdef MULTIBYTE_CHARS
 702   /* Reset multibyte conversion state.  */
 703   (void) local_mbtowc (NULL, NULL, 0);
 704 #endif
 705   for (;;)
 706     {
 707       /* We need room for another char, possibly the terminating NUL.  */
 708       if ((size_t) (limit - dest) < 1)
 709         {
 710           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 711           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 712           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 713           limit = BUFF_LIMIT (pfile->u_buff);
 714         }
 715
 716 #ifdef MULTIBYTE_CHARS
 717       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 718                                buffer->rlimit - buffer->cur);
 719       if (char_len == -1)
 720         {
 721           cpp_error (pfile, DL_WARNING,
 722                      "ignoring invalid multibyte character");
 723           char_len = 1;
 724           c = *buffer->cur++;
 725         }
 726       else
 727         {
 728           buffer->cur += char_len;
 729           c = wc;
 730         }
 731 #else
 732       c = *buffer->cur++;
 733 #endif
 734
 735       /* Handle trigraphs, escaped newlines etc.  */
 736       if (c == '?' || c == '\\')
 737         c = skip_escaped_newlines (pfile);
 738
 739       if (c == terminator)
 740         {
 741           if (unescaped_terminator_p (pfile, dest))
 742             break;
 743         }
 744       else if (is_vspace (c))
 745         {
 746           /* No string literal may extend over multiple lines.  In
 747              assembly language, suppress the error except for <>
 748              includes.  This is a kludge around not knowing where
 749              comments are.  */
 750         unterminated:
 751           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 752             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 753                        (int) terminator);
 754           buffer->cur--;
 755           break;
 756         }
 757       else if (c == '\0')
 758         {
 759           if (buffer->cur - 1 == buffer->rlimit)
 760             goto unterminated;
 761           if (!warned_nulls)
 762             {
 763               warned_nulls = true;
 764               cpp_error (pfile, DL_WARNING,
 765                          "null character(s) preserved in literal");
 766             }
 767         }
 768 #ifdef MULTIBYTE_CHARS
 769       if (char_len > 1)
 770         {
 771           for ( ; char_len > 0; --char_len)
 772             *dest++ = (*buffer->cur - char_len);
 773         }
 774       else
 775 #endif
 776         *dest++ = c;
 777     }
 778
 779   *dest = '\0';
 780
 781   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 782   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 783   BUFF_FRONT (pfile->u_buff) = dest + 1;
 784 }
 785
 786 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
 787    comment blocks (when executed with -C option) and
 788    _asm (SDCPP specific) blocks */
 789
 790 /* Count and copy characters from src to dest, excluding CRs:
 791    CRs are automatically generated, because the output is
 792    opened in TEXT mode. If dest == NULL, only count chars */
 793 static unsigned int
 794 copy_text_chars (dest, src, len)
 795      char *dest;
 796      const char *src;
 797      unsigned int len;
 798 {
 799   unsigned int n = 0;
 800   const char *p;
 801
 802   for (p = src; p != src + len; ++p)
 803     {
 804       assert(*p != '\0');
 805
 806       if (*p != '\r')
 807         {
 808           if (dest != NULL)
 809             *dest++ = *p;
 810           ++n;
 811         }
 812     }
 813
 814     return n;
 815 }
 816
 817 /* SDCC _asm specific */
 818 /* The stored comment includes the comment start and any terminator.  */
 819 static void
 820 save_asm (pfile, token, from)
 821      cpp_reader *pfile;
 822      cpp_token *token;
 823      const unsigned char *from;
 824 {
 825 #define _ASM_STR  "_asm"
 826 #define _ASM_LEN  ((sizeof _ASM_STR) - 1)
 827
 828   unsigned char *buffer;
 829   unsigned int text_len, len;
 830
 831   len = pfile->buffer->cur - from;
 832   /* + _ASM_LEN for the initial '_asm'.  */
 833   text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
 834   buffer = _cpp_unaligned_alloc (pfile, text_len);
 835
 836
 837   token->type = CPP_ASM;
 838   token->val.str.len = text_len;
 839   token->val.str.text = buffer;
 840
 841   memcpy (buffer, _ASM_STR, _ASM_LEN);
 842   copy_text_chars (buffer + _ASM_LEN, from, len);
 843 }
 844
 845 /* The stored comment includes the comment start and any terminator.  */
 846 static void
 847 save_comment (pfile, token, from, type)
 848      cpp_reader *pfile;
 849      cpp_token *token;
 850      const unsigned char *from;
 851      cppchar_t type;
 852 {
 853   unsigned char *buffer;
 854   unsigned int len, clen;
 855
 856   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 857
 858   /* C++ comments probably (not definitely) have moved past a new
 859      line, which we don't want to save in the comment.  */
 860   if (is_vspace (pfile->buffer->cur[-1]))
 861     len--;
 862
 863   /* If we are currently in a directive, then we need to store all
 864      C++ comments as C comments internally, and so we need to
 865      allocate a little extra space in that case.
 866
 867      Note that the only time we encounter a directive here is
 868      when we are saving comments in a "#define".  */
 869   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 870
 871   buffer = _cpp_unaligned_alloc (pfile, clen);
 872
 873   token->type = CPP_COMMENT;
 874   token->val.str.len = clen;
 875   token->val.str.text = buffer;
 876
 877   buffer[0] = '/';
 878   copy_text_chars (buffer + 1, from, len);
 879
 880   /* Finish conversion to a C comment, if necessary.  */
 881   if (pfile->state.in_directive && type == '/')
 882     {
 883       buffer[1] = '*';
 884       buffer[clen - 2] = '*';
 885       buffer[clen - 1] = '/';
 886     }
 887 }
 888
 889 /* Allocate COUNT tokens for RUN.  */
 890 void
 891 _cpp_init_tokenrun (run, count)
 892      tokenrun *run;
 893      unsigned int count;
 894 {
 895   run->base = xnewvec (cpp_token, count);
 896   run->limit = run->base + count;
 897   run->next = NULL;
 898 }
 899
 900 /* Returns the next tokenrun, or creates one if there is none.  */
 901 static tokenrun *
 902 next_tokenrun (run)
 903      tokenrun *run;
 904 {
 905   if (run->next == NULL)
 906     {
 907       run->next = xnew (tokenrun);
 908       run->next->prev = run;
 909       _cpp_init_tokenrun (run->next, 250);
 910     }
 911
 912   return run->next;
 913 }
 914
 915 /* Allocate a single token that is invalidated at the same time as the
 916    rest of the tokens on the line.  Has its line and col set to the
 917    same as the last lexed token, so that diagnostics appear in the
 918    right place.  */
 919 cpp_token *
 920 _cpp_temp_token (pfile)
 921      cpp_reader *pfile;
 922 {
 923   cpp_token *old, *result;
 924
 925   old = pfile->cur_token - 1;
 926   if (pfile->cur_token == pfile->cur_run->limit)
 927     {
 928       pfile->cur_run = next_tokenrun (pfile->cur_run);
 929       pfile->cur_token = pfile->cur_run->base;
 930     }
 931
 932   result = pfile->cur_token++;
 933   result->line = old->line;
 934   result->col = old->col;
 935   return result;
 936 }
 937
 938 /* Lex a token into RESULT (external interface).  Takes care of issues
 939    like directive handling, token lookahead, multiple include
 940    optimization and skipping.  */
 941 const cpp_token *
 942 _cpp_lex_token (pfile)
 943      cpp_reader *pfile;
 944 {
 945   cpp_token *result;
 946
 947   for (;;)
 948     {
 949       if (pfile->cur_token == pfile->cur_run->limit)
 950         {
 951           pfile->cur_run = next_tokenrun (pfile->cur_run);
 952           pfile->cur_token = pfile->cur_run->base;
 953         }
 954
 955       if (pfile->lookaheads)
 956         {
 957           pfile->lookaheads--;
 958           result = pfile->cur_token++;
 959         }
 960       else
 961         result = _cpp_lex_direct (pfile);
 962
 963       if (result->flags & BOL)
 964         {
 965           /* Is this a directive.  If _cpp_handle_directive returns
 966              false, it is an assembler #.  */
 967           if (result->type == CPP_HASH
 968               /* 6.10.3 p 11: Directives in a list of macro arguments
 969                  gives undefined behavior.  This implementation
 970                  handles the directive as normal.  */
 971               && pfile->state.parsing_args != 1
 972               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 973             continue;
 974           if (pfile->cb.line_change && !pfile->state.skipping)
 975             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 976         }
 977
 978       /* We don't skip tokens in directives.  */
 979       if (pfile->state.in_directive)
 980         break;
 981
 982       /* Outside a directive, invalidate controlling macros.  At file
 983          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 984          get here and MI optimisation works.  */
 985       pfile->mi_valid = false;
 986
 987       if (!pfile->state.skipping || result->type == CPP_EOF)
 988         break;
 989     }
 990
 991   return result;
 992 }
 993
 994 /* A NUL terminates the current buffer.  For ISO preprocessing this is
 995    EOF, but for traditional preprocessing it indicates we need a line
 996    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
 997    to return a CPP_EOF to the caller.  */
 998 static bool
 999 continue_after_nul (pfile)
1000      cpp_reader *pfile;
1001 {
1002   cpp_buffer *buffer = pfile->buffer;
1003   bool more = false;
1004
1005   buffer->saved_flags = BOL;
1006   if (CPP_OPTION (pfile, traditional))
1007     {
1008       if (pfile->state.in_directive)
1009         return false;
1010
1011       _cpp_remove_overlay (pfile);
1012       more = _cpp_read_logical_line_trad (pfile);
1013       _cpp_overlay_buffer (pfile, pfile->out.base,
1014                            pfile->out.cur - pfile->out.base);
1015       pfile->line = pfile->out.first_line;
1016     }
1017   else
1018     {
1019       /* Stop parsing arguments with a CPP_EOF.  When we finally come
1020          back here, do the work of popping the buffer.  */
1021       if (!pfile->state.parsing_args)
1022         {
1023           if (buffer->cur != buffer->line_base)
1024             {
1025               /* Non-empty files should end in a newline.  Don't warn
1026                  for command line and _Pragma buffers.  */
1027               if (!buffer->from_stage3)
1028                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
1029               handle_newline (pfile);
1030             }
1031
1032           /* Similarly, finish an in-progress directive with CPP_EOF
1033              before popping the buffer.  */
1034           if (!pfile->state.in_directive && buffer->prev)
1035             {
1036               more = !buffer->return_at_eof;
1037               _cpp_pop_buffer (pfile);
1038             }
1039         }
1040     }
1041
1042   return more;
1043 }
1044
1045 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
1046   do {                                          \
1047     if (get_effective_char (pfile) == CHAR)     \
1048       result->type = THEN_TYPE;                 \
1049     else                                        \
1050       {                                         \
1051         BACKUP ();                              \
1052         result->type = ELSE_TYPE;               \
1053       }                                         \
1054   } while (0)
1055
1056 /* Lex a token into pfile->cur_token, which is also incremented, to
1057    get diagnostics pointing to the correct location.
1058
1059    Does not handle issues such as token lookahead, multiple-include
1060    optimisation, directives, skipping etc.  This function is only
1061    suitable for use by _cpp_lex_token, and in special cases like
1062    lex_expansion_token which doesn't care for any of these issues.
1063
1064    When meeting a newline, returns CPP_EOF if parsing a directive,
1065    otherwise returns to the start of the token buffer if permissible.
1066    Returns the location of the lexed token.  */
1067 cpp_token *
1068 _cpp_lex_direct (pfile)
1069      cpp_reader *pfile;
1070 {
1071   cppchar_t c;
1072   cpp_buffer *buffer;
1073   const unsigned char *comment_start;
1074   cpp_token *result = pfile->cur_token++;
1075
1076  fresh_line:
1077   buffer = pfile->buffer;
1078   result->flags = buffer->saved_flags;
1079   buffer->saved_flags = 0;
1080  update_tokens_line:
1081   result->line = pfile->line;
1082
1083  skipped_white:
1084   c = *buffer->cur++;
1085   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1086
1087  trigraph:
1088   switch (c)
1089     {
1090     case ' ': case '\t': case '\f': case '\v': case '\0':
1091       result->flags |= PREV_WHITE;
1092       if (skip_whitespace (pfile, c))
1093         goto skipped_white;
1094
1095       /* End of buffer.  */
1096       buffer->cur--;
1097       if (continue_after_nul (pfile))
1098         goto fresh_line;
1099       result->type = CPP_EOF;
1100       break;
1101
1102     case '\n': case '\r':
1103       handle_newline (pfile);
1104       buffer->saved_flags = BOL;
1105       if (! pfile->state.in_directive)
1106         {
1107           if (pfile->state.parsing_args == 2)
1108             buffer->saved_flags |= PREV_WHITE;
1109           if (!pfile->keep_tokens)
1110             {
1111               pfile->cur_run = &pfile->base_run;
1112               result = pfile->base_run.base;
1113               pfile->cur_token = result + 1;
1114             }
1115           goto fresh_line;
1116         }
1117       result->type = CPP_EOF;
1118       break;
1119
1120     case '?':
1121     case '\\':
1122       /* These could start an escaped newline, or '?' a trigraph.  Let
1123          skip_escaped_newlines do all the work.  */
1124       {
1125         unsigned int line = pfile->line;
1126
1127         c = skip_escaped_newlines (pfile);
1128         if (line != pfile->line)
1129           {
1130             buffer->cur--;
1131             /* We had at least one escaped newline of some sort.
1132                Update the token's line and column.  */
1133             goto update_tokens_line;
1134           }
1135       }
1136
1137       /* We are either the original '?' or '\\', or a trigraph.  */
1138       if (c == '?')
1139         result->type = CPP_QUERY;
1140       else if (c == '\\')
1141         goto random_char;
1142       else
1143         goto trigraph;
1144       break;
1145
1146     case '0': case '1': case '2': case '3': case '4':
1147     case '5': case '6': case '7': case '8': case '9':
1148       result->type = CPP_NUMBER;
1149       parse_number (pfile, &result->val.str, 0);
1150       break;
1151
1152     case 'L':
1153       /* 'L' may introduce wide characters or strings.  */
1154       {
1155         const unsigned char *pos = buffer->cur;
1156
1157         c = get_effective_char (pfile);
1158         if (c == '\'' || c == '"')
1159           {
1160             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1161             parse_string (pfile, result, c);
1162             break;
1163           }
1164         buffer->cur = pos;
1165       }
1166       /* Fall through.  */
1167
1168     start_ident:
1169     case '_':
1170     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1171     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1172     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1173     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1174     case 'y': case 'z':
1175     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1176     case 'G': case 'H': case 'I': case 'J': case 'K':
1177     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1178     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1179     case 'Y': case 'Z':
1180       result->type = CPP_NAME;
1181       result->val.node = parse_identifier (pfile);
1182
1183       /* SDCC _asm specific */
1184       /* handle _asm ... _endasm ;  */
1185       if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1186         {
1187           comment_start = buffer->cur;
1188           result->type = CPP_ASM;
1189           skip_asm_block (pfile);
1190           /* Save the _asm block as a token in its own right.  */
1191           save_asm (pfile, result, comment_start);
1192         }
1193       /* Convert named operators to their proper types.  */
1194       else if (result->val.node->flags & NODE_OPERATOR)
1195         {
1196           result->flags |= NAMED_OP;
1197           result->type = result->val.node->value.operator;
1198         }
1199       break;
1200
1201     case '\'':
1202     case '"':
1203       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1204       parse_string (pfile, result, c);
1205       break;
1206
1207     case '/':
1208       /* A potential block or line comment.  */
1209       comment_start = buffer->cur;
1210       c = get_effective_char (pfile);
1211
1212       if (c == '*')
1213         {
1214           if (skip_block_comment (pfile))
1215             cpp_error (pfile, DL_ERROR, "unterminated comment");
1216         }
1217       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1218                             || CPP_IN_SYSTEM_HEADER (pfile)))
1219         {
1220           /* Warn about comments only if pedantically GNUC89, and not
1221              in system headers.  */
1222           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1223               && ! buffer->warned_cplusplus_comments)
1224             {
1225               cpp_error (pfile, DL_PEDWARN,
1226                          "C++ style comments are not allowed in ISO C90");
1227               cpp_error (pfile, DL_PEDWARN,
1228                          "(this will be reported only once per input file)");
1229               buffer->warned_cplusplus_comments = 1;
1230             }
1231
1232           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1233             cpp_error (pfile, DL_WARNING, "multi-line comment");
1234         }
1235       else if (c == '=')
1236         {
1237           result->type = CPP_DIV_EQ;
1238           break;
1239         }
1240       else
1241         {
1242           BACKUP ();
1243           result->type = CPP_DIV;
1244           break;
1245         }
1246
1247       if (!pfile->state.save_comments)
1248         {
1249           result->flags |= PREV_WHITE;
1250           goto update_tokens_line;
1251         }
1252
1253       /* Save the comment as a token in its own right.  */
1254       save_comment (pfile, result, comment_start, c);
1255       break;
1256
1257     case '<':
1258       if (pfile->state.angled_headers)
1259         {
1260           result->type = CPP_HEADER_NAME;
1261           parse_string (pfile, result, '>');
1262           break;
1263         }
1264
1265       c = get_effective_char (pfile);
1266       if (c == '=')
1267         result->type = CPP_LESS_EQ;
1268       else if (c == '<')
1269         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1270       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1271         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1272       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1273         {
1274           result->type = CPP_OPEN_SQUARE;
1275           result->flags |= DIGRAPH;
1276         }
1277       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1278         {
1279           result->type = CPP_OPEN_BRACE;
1280           result->flags |= DIGRAPH;
1281         }
1282       else
1283         {
1284           BACKUP ();
1285           result->type = CPP_LESS;
1286         }
1287       break;
1288
1289     case '>':
1290       c = get_effective_char (pfile);
1291       if (c == '=')
1292         result->type = CPP_GREATER_EQ;
1293       else if (c == '>')
1294         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1295       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1296         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1297       else
1298         {
1299           BACKUP ();
1300           result->type = CPP_GREATER;
1301         }
1302       break;
1303
1304     case '%':
1305       c = get_effective_char (pfile);
1306       if (c == '=')
1307         result->type = CPP_MOD_EQ;
1308       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1309         {
1310           result->flags |= DIGRAPH;
1311           result->type = CPP_HASH;
1312           if (get_effective_char (pfile) == '%')
1313             {
1314               const unsigned char *pos = buffer->cur;
1315
1316               if (get_effective_char (pfile) == ':')
1317                 result->type = CPP_PASTE;
1318               else
1319                 buffer->cur = pos - 1;
1320             }
1321           else
1322             BACKUP ();
1323         }
1324       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1325         {
1326           result->flags |= DIGRAPH;
1327           result->type = CPP_CLOSE_BRACE;
1328         }
1329       else
1330         {
1331           BACKUP ();
1332           result->type = CPP_MOD;
1333         }
1334       break;
1335
1336     case '.':
1337       result->type = CPP_DOT;
1338       c = get_effective_char (pfile);
1339       if (c == '.')
1340         {
1341           const unsigned char *pos = buffer->cur;
1342
1343           if (get_effective_char (pfile) == '.')
1344             result->type = CPP_ELLIPSIS;
1345           else
1346             buffer->cur = pos - 1;
1347         }
1348       /* All known character sets have 0...9 contiguous.  */
1349       else if (ISDIGIT (c))
1350         {
1351           result->type = CPP_NUMBER;
1352           parse_number (pfile, &result->val.str, 1);
1353         }
1354       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1355         result->type = CPP_DOT_STAR;
1356       else
1357         BACKUP ();
1358       break;
1359
1360     case '+':
1361       c = get_effective_char (pfile);
1362       if (c == '+')
1363         result->type = CPP_PLUS_PLUS;
1364       else if (c == '=')
1365         result->type = CPP_PLUS_EQ;
1366       else
1367         {
1368           BACKUP ();
1369           result->type = CPP_PLUS;
1370         }
1371       break;
1372
1373     case '-':
1374       c = get_effective_char (pfile);
1375       if (c == '>')
1376         {
1377           result->type = CPP_DEREF;
1378           if (CPP_OPTION (pfile, cplusplus))
1379             {
1380               if (get_effective_char (pfile) == '*')
1381                 result->type = CPP_DEREF_STAR;
1382               else
1383                 BACKUP ();
1384             }
1385         }
1386       else if (c == '-')
1387         result->type = CPP_MINUS_MINUS;
1388       else if (c == '=')
1389         result->type = CPP_MINUS_EQ;
1390       else
1391         {
1392           BACKUP ();
1393           result->type = CPP_MINUS;
1394         }
1395       break;
1396
1397     case '&':
1398       c = get_effective_char (pfile);
1399       if (c == '&')
1400         result->type = CPP_AND_AND;
1401       else if (c == '=')
1402         result->type = CPP_AND_EQ;
1403       else
1404         {
1405           BACKUP ();
1406           result->type = CPP_AND;
1407         }
1408       break;
1409
1410     case '|':
1411       c = get_effective_char (pfile);
1412       if (c == '|')
1413         result->type = CPP_OR_OR;
1414       else if (c == '=')
1415         result->type = CPP_OR_EQ;
1416       else
1417         {
1418           BACKUP ();
1419           result->type = CPP_OR;
1420         }
1421       break;
1422
1423     case ':':
1424       c = get_effective_char (pfile);
1425       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1426         result->type = CPP_SCOPE;
1427       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1428         {
1429           result->flags |= DIGRAPH;
1430           result->type = CPP_CLOSE_SQUARE;
1431         }
1432       else
1433         {
1434           BACKUP ();
1435           result->type = CPP_COLON;
1436         }
1437       break;
1438
1439     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1440     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1441     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1442     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1443     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1444
1445     case '~': result->type = CPP_COMPL; break;
1446     case ',': result->type = CPP_COMMA; break;
1447     case '(': result->type = CPP_OPEN_PAREN; break;
1448     case ')': result->type = CPP_CLOSE_PAREN; break;
1449     case '[': result->type = CPP_OPEN_SQUARE; break;
1450     case ']': result->type = CPP_CLOSE_SQUARE; break;
1451     case '{': result->type = CPP_OPEN_BRACE; break;
1452     case '}': result->type = CPP_CLOSE_BRACE; break;
1453     case ';': result->type = CPP_SEMICOLON; break;
1454
1455       /* @ is a punctuator in Objective-C.  */
1456     case '@': result->type = CPP_ATSIGN; break;
1457
1458     case '$':
1459       if (CPP_OPTION (pfile, dollars_in_ident))
1460         goto start_ident;
1461       /* Fall through...  */
1462
1463     random_char:
1464     default:
1465       result->type = CPP_OTHER;
1466       result->val.c = c;
1467       break;
1468     }
1469
1470   return result;
1471 }
1472
1473 /* An upper bound on the number of bytes needed to spell TOKEN,
1474    including preceding whitespace.  */
1475 unsigned int
1476 cpp_token_len (token)
1477      const cpp_token *token;
1478 {
1479   unsigned int len;
1480
1481   switch (TOKEN_SPELL (token))
1482     {
1483     default:            len = 0;                                break;
1484     case SPELL_NUMBER:
1485     case SPELL_STRING:  len = token->val.str.len;               break;
1486     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1487     }
1488   /* 1 for whitespace, 4 for comment delimiters.  */
1489   return len + 5;
1490 }
1491
1492 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1493    already contain the enough space to hold the token's spelling.
1494    Returns a pointer to the character after the last character
1495    written.  */
1496 unsigned char *
1497 cpp_spell_token (pfile, token, buffer)
1498      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1499      const cpp_token *token;
1500      unsigned char *buffer;
1501 {
1502   switch (TOKEN_SPELL (token))
1503     {
1504     case SPELL_OPERATOR:
1505       {
1506         const unsigned char *spelling;
1507         unsigned char c;
1508
1509         if (token->flags & DIGRAPH)
1510           spelling
1511             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1512         else if (token->flags & NAMED_OP)
1513           goto spell_ident;
1514         else
1515           spelling = TOKEN_NAME (token);
1516
1517         while ((c = *spelling++) != '\0')
1518           *buffer++ = c;
1519       }
1520       break;
1521
1522     case SPELL_CHAR:
1523       *buffer++ = token->val.c;
1524       break;
1525
1526     spell_ident:
1527     case SPELL_IDENT:
1528       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1529       buffer += NODE_LEN (token->val.node);
1530       break;
1531
1532     case SPELL_NUMBER:
1533       memcpy (buffer, token->val.str.text, token->val.str.len);
1534       buffer += token->val.str.len;
1535       break;
1536
1537     case SPELL_STRING:
1538       {
1539         int left, right, tag;
1540         switch (token->type)
1541           {
1542           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1543           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1544           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1545           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1546           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1547           default:
1548             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1549                        TOKEN_NAME (token));
1550             return buffer;
1551           }
1552         if (tag) *buffer++ = tag;
1553         *buffer++ = left;
1554         memcpy (buffer, token->val.str.text, token->val.str.len);
1555         buffer += token->val.str.len;
1556         *buffer++ = right;
1557       }
1558       break;
1559
1560     case SPELL_NONE:
1561       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1562       break;
1563     }
1564
1565   return buffer;
1566 }
1567
1568 /* Returns TOKEN spelt as a null-terminated string.  The string is
1569    freed when the reader is destroyed.  Useful for diagnostics.  */
1570 unsigned char *
1571 cpp_token_as_text (pfile, token)
1572      cpp_reader *pfile;
1573      const cpp_token *token;
1574 {
1575   unsigned int len = cpp_token_len (token);
1576   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1577
1578   end = cpp_spell_token (pfile, token, start);
1579   end[0] = '\0';
1580
1581   return start;
1582 }
1583
1584 /* Used by C front ends, which really should move to using
1585    cpp_token_as_text.  */
1586 const char *
1587 cpp_type2name (type)
1588      enum cpp_ttype type;
1589 {
1590   return (const char *) token_spellings[type].name;
1591 }
1592
1593 /* Writes the spelling of token to FP, without any preceding space.
1594    Separated from cpp_spell_token for efficiency - to avoid stdio
1595    double-buffering.  */
1596 void
1597 cpp_output_token (token, fp)
1598      const cpp_token *token;
1599      FILE *fp;
1600 {
1601   switch (TOKEN_SPELL (token))
1602     {
1603     case SPELL_OPERATOR:
1604       {
1605         const unsigned char *spelling;
1606         int c;
1607
1608         if (token->flags & DIGRAPH)
1609           spelling
1610             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1611         else if (token->flags & NAMED_OP)
1612           goto spell_ident;
1613         else
1614           spelling = TOKEN_NAME (token);
1615
1616         c = *spelling;
1617         do
1618           putc (c, fp);
1619         while ((c = *++spelling) != '\0');
1620       }
1621       break;
1622
1623     case SPELL_CHAR:
1624       putc (token->val.c, fp);
1625       break;
1626
1627     spell_ident:
1628     case SPELL_IDENT:
1629       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1630     break;
1631
1632     case SPELL_NUMBER:
1633       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1634       break;
1635
1636     case SPELL_STRING:
1637       {
1638         int left, right, tag;
1639         switch (token->type)
1640           {
1641           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1642           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1643           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1644           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1645           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1646           case CPP_ASM:         left = '\0'; right = '\0'; tag = '\0'; break;
1647           default:
1648             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1649             return;
1650           }
1651         if (tag) putc (tag, fp);
1652         if (left) putc (left, fp);
1653         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1654         if (right) putc (right, fp);
1655       }
1656       break;
1657
1658     case SPELL_NONE:
1659       /* An error, most probably.  */
1660       break;
1661     }
1662 }
1663
1664 /* Compare two tokens.  */
1665 int
1666 _cpp_equiv_tokens (a, b)
1667      const cpp_token *a, *b;
1668 {
1669   if (a->type == b->type && a->flags == b->flags)
1670     switch (TOKEN_SPELL (a))
1671       {
1672       default:                  /* Keep compiler happy.  */
1673       case SPELL_OPERATOR:
1674         return 1;
1675       case SPELL_CHAR:
1676         return a->val.c == b->val.c; /* Character.  */
1677       case SPELL_NONE:
1678         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1679       case SPELL_IDENT:
1680         return a->val.node == b->val.node;
1681       case SPELL_NUMBER:
1682       case SPELL_STRING:
1683         return (a->val.str.len == b->val.str.len
1684                 && !memcmp (a->val.str.text, b->val.str.text,
1685                             a->val.str.len));
1686       }
1687
1688   return 0;
1689 }
1690
1691 /* Returns nonzero if a space should be inserted to avoid an
1692    accidental token paste for output.  For simplicity, it is
1693    conservative, and occasionally advises a space where one is not
1694    needed, e.g. "." and ".2".  */
1695 int
1696 cpp_avoid_paste (pfile, token1, token2)
1697      cpp_reader *pfile;
1698      const cpp_token *token1, *token2;
1699 {
1700   enum cpp_ttype a = token1->type, b = token2->type;
1701   cppchar_t c;
1702
1703   if (token1->flags & NAMED_OP)
1704     a = CPP_NAME;
1705   if (token2->flags & NAMED_OP)
1706     b = CPP_NAME;
1707
1708   c = EOF;
1709   if (token2->flags & DIGRAPH)
1710     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1711   else if (token_spellings[b].category == SPELL_OPERATOR)
1712     c = token_spellings[b].name[0];
1713
1714   /* Quickly get everything that can paste with an '='.  */
1715   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1716     return 1;
1717
1718   switch (a)
1719     {
1720     case CPP_GREATER:   return c == '>' || c == '?';
1721     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1722     case CPP_PLUS:      return c == '+';
1723     case CPP_MINUS:     return c == '-' || c == '>';
1724     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1725     case CPP_MOD:       return c == ':' || c == '>';
1726     case CPP_AND:       return c == '&';
1727     case CPP_OR:        return c == '|';
1728     case CPP_COLON:     return c == ':' || c == '>';
1729     case CPP_DEREF:     return c == '*';
1730     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1731     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1732     case CPP_NAME:      return ((b == CPP_NUMBER
1733                                  && name_p (pfile, &token2->val.str))
1734                                 || b == CPP_NAME
1735                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1736     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1737                                 || c == '.' || c == '+' || c == '-');
1738     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1739                                 && token1->val.c == '@'
1740                                 && (b == CPP_NAME || b == CPP_STRING));
1741     default:            break;
1742     }
1743
1744   return 0;
1745 }
1746
1747 /* Output all the remaining tokens on the current line, and a newline
1748    character, to FP.  Leading whitespace is removed.  If there are
1749    macros, special token padding is not performed.  */
1750 void
1751 cpp_output_line (pfile, fp)
1752      cpp_reader *pfile;
1753      FILE *fp;
1754 {
1755   const cpp_token *token;
1756
1757   token = cpp_get_token (pfile);
1758   while (token->type != CPP_EOF)
1759     {
1760       cpp_output_token (token, fp);
1761       token = cpp_get_token (pfile);
1762       if (token->flags & PREV_WHITE)
1763         putc (' ', fp);
1764     }
1765
1766   putc ('\n', fp);
1767 }
1768
1769 /* Returns the value of a hexadecimal digit.  */
1770 static unsigned int
1771 hex_digit_value (c)
1772      unsigned int c;
1773 {
1774   if (hex_p (c))
1775     return hex_value (c);
1776   else
1777     abort ();
1778 }
1779
1780 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1781    failure if cpplib is not parsing C++ or C99.  Such failure is
1782    silent, and no variables are updated.  Otherwise returns 0, and
1783    warns if -Wtraditional.
1784
1785    [lex.charset]: The character designated by the universal character
1786    name \UNNNNNNNN is that character whose character short name in
1787    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1788    universal character name \uNNNN is that character whose character
1789    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1790    for a universal character name is less than 0x20 or in the range
1791    0x7F-0x9F (inclusive), or if the universal character name
1792    designates a character in the basic source character set, then the
1793    program is ill-formed.
1794
1795    We assume that wchar_t is Unicode, so we don't need to do any
1796    mapping.  Is this ever wrong?
1797
1798    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1799    LIMIT is the end of the string or charconst.  PSTR is updated to
1800    point after the UCS on return, and the UCS is written into PC.  */
1801
1802 static int
1803 maybe_read_ucs (pfile, pstr, limit, pc)
1804      cpp_reader *pfile;
1805      const unsigned char **pstr;
1806      const unsigned char *limit;
1807      cppchar_t *pc;
1808 {
1809   const unsigned char *p = *pstr;
1810   unsigned int code = 0;
1811   unsigned int c = *pc, length;
1812
1813   /* Only attempt to interpret a UCS for C++ and C99.  */
1814   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1815     return 1;
1816
1817   if (CPP_WTRADITIONAL (pfile))
1818     cpp_error (pfile, DL_WARNING,
1819                "the meaning of '\\%c' is different in traditional C", c);
1820
1821   length = (c == 'u' ? 4: 8);
1822
1823   if ((size_t) (limit - p) < length)
1824     {
1825       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1826       /* Skip to the end to avoid more diagnostics.  */
1827       p = limit;
1828     }
1829   else
1830     {
1831       for (; length; length--, p++)
1832         {
1833           c = *p;
1834           if (ISXDIGIT (c))
1835             code = (code << 4) + hex_digit_value (c);
1836           else
1837             {
1838               cpp_error (pfile, DL_ERROR,
1839                          "non-hex digit '%c' in universal-character-name", c);
1840               /* We shouldn't skip in case there are multibyte chars.  */
1841               break;
1842             }
1843         }
1844     }
1845
1846 #ifdef TARGET_EBCDIC
1847   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1848   code = 0x3f;  /* EBCDIC invalid character */
1849 #else
1850  /* True extended characters are OK.  */
1851   if (code >= 0xa0
1852       && !(code & 0x80000000)
1853       && !(code >= 0xD800 && code <= 0xDFFF))
1854     ;
1855   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1856      hex escapes so that this also works with EBCDIC hosts.  */
1857   else if (code == 0x24 || code == 0x40 || code == 0x60)
1858     ;
1859   /* Don't give another error if one occurred above.  */
1860   else if (length == 0)
1861     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1862 #endif
1863
1864   *pstr = p;
1865   *pc = code;
1866   return 0;
1867 }
1868
1869 /* Returns the value of an escape sequence, truncated to the correct
1870    target precision.  PSTR points to the input pointer, which is just
1871    after the backslash.  LIMIT is how much text we have.  WIDE is true
1872    if the escape sequence is part of a wide character constant or
1873    string literal.  Handles all relevant diagnostics.  */
1874 cppchar_t
1875 cpp_parse_escape (pfile, pstr, limit, wide)
1876      cpp_reader *pfile;
1877      const unsigned char **pstr;
1878      const unsigned char *limit;
1879      int wide;
1880 {
1881   int unknown = 0;
1882   const unsigned char *str = *pstr;
1883   cppchar_t c, mask;
1884   unsigned int width;
1885
1886   if (wide)
1887     width = CPP_OPTION (pfile, wchar_precision);
1888   else
1889     width = CPP_OPTION (pfile, char_precision);
1890   if (width < BITS_PER_CPPCHAR_T)
1891     mask = ((cppchar_t) 1 << width) - 1;
1892   else
1893     mask = ~0;
1894
1895   c = *str++;
1896   switch (c)
1897     {
1898     case '\\': case '\'': case '"': case '?': break;
1899     case 'b': c = TARGET_BS;      break;
1900     case 'f': c = TARGET_FF;      break;
1901     case 'n': c = TARGET_NEWLINE; break;
1902     case 'r': c = TARGET_CR;      break;
1903     case 't': c = TARGET_TAB;     break;
1904     case 'v': c = TARGET_VT;      break;
1905
1906     case '(': case '{': case '[': case '%':
1907       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1908          '\%' is used to prevent SCCS from getting confused.  */
1909       unknown = CPP_PEDANTIC (pfile);
1910       break;
1911
1912     case 'a':
1913       if (CPP_WTRADITIONAL (pfile))
1914         cpp_error (pfile, DL_WARNING,
1915                    "the meaning of '\\a' is different in traditional C");
1916       c = TARGET_BELL;
1917       break;
1918
1919     case 'e': case 'E':
1920       if (CPP_PEDANTIC (pfile))
1921         cpp_error (pfile, DL_PEDWARN,
1922                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1923       c = TARGET_ESC;
1924       break;
1925
1926     case 'u': case 'U':
1927       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1928       break;
1929
1930     case 'x':
1931       if (CPP_WTRADITIONAL (pfile))
1932         cpp_error (pfile, DL_WARNING,
1933                    "the meaning of '\\x' is different in traditional C");
1934
1935       {
1936         cppchar_t i = 0, overflow = 0;
1937         int digits_found = 0;
1938
1939         while (str < limit)
1940           {
1941             c = *str;
1942             if (! ISXDIGIT (c))
1943               break;
1944             str++;
1945             overflow |= i ^ (i << 4 >> 4);
1946             i = (i << 4) + hex_digit_value (c);
1947             digits_found = 1;
1948           }
1949
1950         if (!digits_found)
1951           cpp_error (pfile, DL_ERROR,
1952                        "\\x used with no following hex digits");
1953
1954         if (overflow | (i != (i & mask)))
1955           {
1956             cpp_error (pfile, DL_PEDWARN,
1957                        "hex escape sequence out of range");
1958             i &= mask;
1959           }
1960         c = i;
1961       }
1962       break;
1963
1964     case '0':  case '1':  case '2':  case '3':
1965     case '4':  case '5':  case '6':  case '7':
1966       {
1967         size_t count = 0;
1968         cppchar_t i = c - '0';
1969
1970         while (str < limit && ++count < 3)
1971           {
1972             c = *str;
1973             if (c < '0' || c > '7')
1974               break;
1975             str++;
1976             i = (i << 3) + c - '0';
1977           }
1978
1979         if (i != (i & mask))
1980           {
1981             cpp_error (pfile, DL_PEDWARN,
1982                        "octal escape sequence out of range");
1983             i &= mask;
1984           }
1985         c = i;
1986       }
1987       break;
1988
1989     default:
1990       unknown = 1;
1991       break;
1992     }
1993
1994   if (unknown)
1995     {
1996       if (ISGRAPH (c))
1997         cpp_error (pfile, DL_PEDWARN,
1998                    "unknown escape sequence '\\%c'", (int) c);
1999       else
2000         cpp_error (pfile, DL_PEDWARN,
2001                    "unknown escape sequence: '\\%03o'", (int) c);
2002     }
2003
2004   if (c > mask)
2005     {
2006       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
2007       c &= mask;
2008     }
2009
2010   *pstr = str;
2011   return c;
2012 }
2013
2014 /* Interpret a (possibly wide) character constant in TOKEN.
2015    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
2016    points to a variable that is filled in with the number of
2017    characters seen, and UNSIGNEDP to a variable that indicates whether
2018    the result has signed type.  */
2019 cppchar_t
2020 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
2021      cpp_reader *pfile;
2022      const cpp_token *token;
2023      unsigned int *pchars_seen;
2024      int *unsignedp;
2025 {
2026   const unsigned char *str = token->val.str.text;
2027   const unsigned char *limit = str + token->val.str.len;
2028   unsigned int chars_seen = 0;
2029   size_t width, max_chars;
2030   cppchar_t c, mask, result = 0;
2031   bool unsigned_p;
2032
2033 #ifdef MULTIBYTE_CHARS
2034   (void) local_mbtowc (NULL, NULL, 0);
2035 #endif
2036
2037   /* Width in bits.  */
2038   if (token->type == CPP_CHAR)
2039     {
2040       width = CPP_OPTION (pfile, char_precision);
2041       max_chars = CPP_OPTION (pfile, int_precision) / width;
2042       unsigned_p = CPP_OPTION (pfile, unsigned_char);
2043     }
2044   else
2045     {
2046       width = CPP_OPTION (pfile, wchar_precision);
2047       max_chars = 1;
2048       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
2049     }
2050
2051   if (width < BITS_PER_CPPCHAR_T)
2052     mask = ((cppchar_t) 1 << width) - 1;
2053   else
2054     mask = ~0;
2055
2056   while (str < limit)
2057     {
2058 #ifdef MULTIBYTE_CHARS
2059       wchar_t wc;
2060       int char_len;
2061
2062       char_len = local_mbtowc (&wc, str, limit - str);
2063       if (char_len == -1)
2064         {
2065           cpp_error (pfile, DL_WARNING,
2066                      "ignoring invalid multibyte character");
2067           c = *str++;
2068         }
2069       else
2070         {
2071           str += char_len;
2072           c = wc;
2073         }
2074 #else
2075       c = *str++;
2076 #endif
2077
2078       if (c == '\\')
2079         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
2080
2081 #ifdef MAP_CHARACTER
2082       if (ISPRINT (c))
2083         c = MAP_CHARACTER (c);
2084 #endif
2085
2086       chars_seen++;
2087
2088       /* Truncate the character, scale the result and merge the two.  */
2089       c &= mask;
2090       if (width < BITS_PER_CPPCHAR_T)
2091         result = (result << width) | c;
2092       else
2093         result = c;
2094     }
2095
2096   if (chars_seen == 0)
2097     cpp_error (pfile, DL_ERROR, "empty character constant");
2098   else if (chars_seen > 1)
2099     {
2100       /* Multichar charconsts are of type int and therefore signed.  */
2101       unsigned_p = 0;
2102
2103       if (chars_seen > max_chars)
2104         {
2105           chars_seen = max_chars;
2106           cpp_error (pfile, DL_WARNING,
2107                      "character constant too long for its type");
2108         }
2109       else if (CPP_OPTION (pfile, warn_multichar))
2110         cpp_error (pfile, DL_WARNING, "multi-character character constant");
2111     }
2112
2113   /* Sign-extend or truncate the constant to cppchar_t.  The value is
2114      in WIDTH bits, but for multi-char charconsts it's value is the
2115      full target type's width.  */
2116   if (chars_seen > 1)
2117     width *= max_chars;
2118   if (width < BITS_PER_CPPCHAR_T)
2119     {
2120       mask = ((cppchar_t) 1 << width) - 1;
2121       if (unsigned_p || !(result & (1 << (width - 1))))
2122         result &= mask;
2123       else
2124         result |= ~mask;
2125     }
2126
2127   *pchars_seen = chars_seen;
2128   *unsignedp = unsigned_p;
2129   return result;
2130 }
2131
2132 /* Memory buffers.  Changing these three constants can have a dramatic
2133    effect on performance.  The values here are reasonable defaults,
2134    but might be tuned.  If you adjust them, be sure to test across a
2135    range of uses of cpplib, including heavy nested function-like macro
2136    expansion.  Also check the change in peak memory usage (NJAMD is a
2137    good tool for this).  */
2138 #define MIN_BUFF_SIZE 8000
2139 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2140 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2141         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2142
2143 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2144   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2145 #endif
2146
2147 /* Create a new allocation buffer.  Place the control block at the end
2148    of the buffer, so that buffer overflows will cause immediate chaos.  */
2149 static _cpp_buff *
2150 new_buff (len)
2151      size_t len;
2152 {
2153   _cpp_buff *result;
2154   unsigned char *base;
2155
2156   if (len < MIN_BUFF_SIZE)
2157     len = MIN_BUFF_SIZE;
2158   len = CPP_ALIGN (len);
2159
2160   base = xmalloc (len + sizeof (_cpp_buff));
2161   result = (_cpp_buff *) (base + len);
2162   result->base = base;
2163   result->cur = base;
2164   result->limit = base + len;
2165   result->next = NULL;
2166   return result;
2167 }
2168
2169 /* Place a chain of unwanted allocation buffers on the free list.  */
2170 void
2171 _cpp_release_buff (pfile, buff)
2172      cpp_reader *pfile;
2173      _cpp_buff *buff;
2174 {
2175   _cpp_buff *end = buff;
2176
2177   while (end->next)
2178     end = end->next;
2179   end->next = pfile->free_buffs;
2180   pfile->free_buffs = buff;
2181 }
2182
2183 /* Return a free buffer of size at least MIN_SIZE.  */
2184 _cpp_buff *
2185 _cpp_get_buff (pfile, min_size)
2186      cpp_reader *pfile;
2187      size_t min_size;
2188 {
2189   _cpp_buff *result, **p;
2190
2191   for (p = &pfile->free_buffs;; p = &(*p)->next)
2192     {
2193       size_t size;
2194
2195       if (*p == NULL)
2196         return new_buff (min_size);
2197       result = *p;
2198       size = result->limit - result->base;
2199       /* Return a buffer that's big enough, but don't waste one that's
2200          way too big.  */
2201       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2202         break;
2203     }
2204
2205   *p = result->next;
2206   result->next = NULL;
2207   result->cur = result->base;
2208   return result;
2209 }
2210
2211 /* Creates a new buffer with enough space to hold the uncommitted
2212    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2213    the excess bytes to the new buffer.  Chains the new buffer after
2214    BUFF, and returns the new buffer.  */
2215 _cpp_buff *
2216 _cpp_append_extend_buff (pfile, buff, min_extra)
2217      cpp_reader *pfile;
2218      _cpp_buff *buff;
2219      size_t min_extra;
2220 {
2221   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2222   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2223
2224   buff->next = new_buff;
2225   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2226   return new_buff;
2227 }
2228
2229 /* Creates a new buffer with enough space to hold the uncommitted
2230    remaining bytes of the buffer pointed to by BUFF, and at least
2231    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2232    Chains the new buffer before the buffer pointed to by BUFF, and
2233    updates the pointer to point to the new buffer.  */
2234 void
2235 _cpp_extend_buff (pfile, pbuff, min_extra)
2236      cpp_reader *pfile;
2237      _cpp_buff **pbuff;
2238      size_t min_extra;
2239 {
2240   _cpp_buff *new_buff, *old_buff = *pbuff;
2241   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2242
2243   new_buff = _cpp_get_buff (pfile, size);
2244   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2245   new_buff->next = old_buff;
2246   *pbuff = new_buff;
2247 }
2248
2249 /* Free a chain of buffers starting at BUFF.  */
2250 void
2251 _cpp_free_buff (buff)
2252      _cpp_buff *buff;
2253 {
2254   _cpp_buff *next;
2255
2256   for (; buff; buff = next)
2257     {
2258       next = buff->next;
2259       free (buff->base);
2260     }
2261 }
2262
2263 /* Allocate permanent, unaligned storage of length LEN.  */
2264 unsigned char *
2265 _cpp_unaligned_alloc (pfile, len)
2266      cpp_reader *pfile;
2267      size_t len;
2268 {
2269   _cpp_buff *buff = pfile->u_buff;
2270   unsigned char *result = buff->cur;
2271
2272   if (len > (size_t) (buff->limit - result))
2273     {
2274       buff = _cpp_get_buff (pfile, len);
2275       buff->next = pfile->u_buff;
2276       pfile->u_buff = buff;
2277       result = buff->cur;
2278     }
2279
2280   buff->cur = result + len;
2281   return result;
2282 }
2283
2284 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2285    That buffer is used for growing allocations when saving macro
2286    replacement lists in a #define, and when parsing an answer to an
2287    assertion in #assert, #unassert or #if (and therefore possibly
2288    whilst expanding macros).  It therefore must not be used by any
2289    code that they might call: specifically the lexer and the guts of
2290    the macro expander.
2291
2292    All existing other uses clearly fit this restriction: storing
2293    registered pragmas during initialization.  */
2294 unsigned char *
2295 _cpp_aligned_alloc (pfile, len)
2296      cpp_reader *pfile;
2297      size_t len;
2298 {
2299   _cpp_buff *buff = pfile->a_buff;
2300   unsigned char *result = buff->cur;
2301
2302   if (len > (size_t) (buff->limit - result))
2303     {
2304       buff = _cpp_get_buff (pfile, len);
2305       buff->next = pfile->a_buff;
2306       pfile->a_buff = buff;
2307       result = buff->cur;
2308     }
2309
2310   buff->cur = result + len;
2311   return result;
2312 }