git.gag.com Git - fw/sdcc/blob - support/cpp2/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27 #include <assert.h>
  28
  29 #ifdef MULTIBYTE_CHARS
  30 #include "mbchar.h"
  31 #include <locale.h>
  32 #endif
  33
  34 /* Tokens with SPELL_STRING store their spelling in the token list,
  35    and it's length in the token->val.name.len.  */
  36 enum spell_type
  37 {
  38   SPELL_OPERATOR = 0,
  39   SPELL_CHAR,
  40   SPELL_IDENT,
  41   SPELL_NUMBER,
  42   SPELL_STRING,
  43   SPELL_NONE
  44 };
  45
  46 struct token_spelling
  47 {
  48   enum spell_type category;
  49   const unsigned char *name;
  50 };
  51
  52 static const unsigned char *const digraph_spellings[] =
  53 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  54
  55 #define OP(e, s) { SPELL_OPERATOR, U s           },
  56 #define TK(e, s) { s,              U STRINGX (e) },
  57 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  58 #undef OP
  59 #undef TK
  60
  61 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  62 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  63 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  64
  65 static void handle_newline PARAMS ((cpp_reader *));
  66 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  67 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  68
  69 static int skip_asm_block PARAMS ((cpp_reader *));
  70 static int skip_block_comment PARAMS ((cpp_reader *));
  71 static int skip_line_comment PARAMS ((cpp_reader *));
  72 static void adjust_column PARAMS ((cpp_reader *));
  73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  76                                   unsigned int *));
  77 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  78 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  79 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  80 static bool trigraph_p PARAMS ((cpp_reader *));
  81 static unsigned int copy_text_chars PARAMS ((char *, const char *, unsigned int));
  82 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const uchar *));
  83 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  84                                   cppchar_t));
  85 static bool continue_after_nul PARAMS ((cpp_reader *));
  86 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  87 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  88                                    const unsigned char *, cppchar_t *));
  89 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  90
  91 static unsigned int hex_digit_value PARAMS ((unsigned int));
  92 static _cpp_buff *new_buff PARAMS ((size_t));
  93
  94 /* Utility routine:
  95
  96    Compares, the token TOKEN to the NUL-terminated string STRING.
  97    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  98 int
  99 cpp_ideq (token, string)
 100      const cpp_token *token;
 101      const char *string;
 102 {
 103   if (token->type != CPP_NAME)
 104     return 0;
 105
 106   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 107 }
 108
 109 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 110    Returns with buffer->cur pointing to the character immediately
 111    following the newline (combination).  */
 112 static void
 113 handle_newline (pfile)
 114      cpp_reader *pfile;
 115 {
 116   cpp_buffer *buffer = pfile->buffer;
 117
 118   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 119      only accept CR-LF; maybe we should fall back to that behavior?  */
 120   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 121     buffer->cur++;
 122
 123   buffer->line_base = buffer->cur;
 124   buffer->col_adjust = 0;
 125   pfile->line++;
 126 }
 127
 128 /* Subroutine of skip_escaped_newlines; called when a 3-character
 129    sequence beginning with "??" is encountered.  buffer->cur points to
 130    the second '?'.
 131
 132    Warn if necessary, and returns true if the sequence forms a
 133    trigraph and the trigraph should be honored.  */
 134 static bool
 135 trigraph_p (pfile)
 136      cpp_reader *pfile;
 137 {
 138   cpp_buffer *buffer = pfile->buffer;
 139   cppchar_t from_char = buffer->cur[1];
 140   bool accept;
 141
 142   if (!_cpp_trigraph_map[from_char])
 143     return false;
 144
 145   accept = CPP_OPTION (pfile, trigraphs);
 146
 147   /* Don't warn about trigraphs in comments.  */
 148   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 149     {
 150       if (accept)
 151         cpp_error_with_line (pfile, DL_WARNING,
 152                              pfile->line, CPP_BUF_COL (buffer) - 1,
 153                              "trigraph ??%c converted to %c",
 154                              (int) from_char,
 155                              (int) _cpp_trigraph_map[from_char]);
 156       else if (buffer->cur != buffer->last_Wtrigraphs)
 157         {
 158           buffer->last_Wtrigraphs = buffer->cur;
 159           cpp_error_with_line (pfile, DL_WARNING,
 160                                pfile->line, CPP_BUF_COL (buffer) - 1,
 161                                "trigraph ??%c ignored", (int) from_char);
 162         }
 163     }
 164
 165   return accept;
 166 }
 167
 168 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 169    lie in buffer->cur[-1].  Returns the next byte, which will be in
 170    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 171    2 of the ISO C standard.  */
 172 static cppchar_t
 173 skip_escaped_newlines (pfile)
 174      cpp_reader *pfile;
 175 {
 176   cpp_buffer *buffer = pfile->buffer;
 177   cppchar_t next = buffer->cur[-1];
 178
 179   /* Only do this if we apply stages 1 and 2.  */
 180   if (!buffer->from_stage3)
 181     {
 182       const unsigned char *saved_cur;
 183       cppchar_t next1;
 184
 185       do
 186         {
 187           if (next == '?')
 188             {
 189               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 190                 break;
 191
 192               /* Translate the trigraph.  */
 193               next = _cpp_trigraph_map[buffer->cur[1]];
 194               buffer->cur += 2;
 195               if (next != '\\')
 196                 break;
 197             }
 198
 199           if (buffer->cur == buffer->rlimit)
 200             break;
 201
 202           /* We have a backslash, and room for at least one more
 203              character.  Skip horizontal whitespace.  */
 204           saved_cur = buffer->cur;
 205           do
 206             next1 = *buffer->cur++;
 207           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 208
 209           if (!is_vspace (next1))
 210             {
 211               buffer->cur = saved_cur;
 212               break;
 213             }
 214
 215           if (saved_cur != buffer->cur - 1
 216               && !pfile->state.lexing_comment)
 217             cpp_error (pfile, DL_WARNING,
 218                        "backslash and newline separated by space");
 219
 220           handle_newline (pfile);
 221           buffer->backup_to = buffer->cur;
 222           if (buffer->cur == buffer->rlimit)
 223             {
 224               cpp_error (pfile, DL_PEDWARN,
 225                          "backslash-newline at end of file");
 226               next = EOF;
 227             }
 228           else
 229             next = *buffer->cur++;
 230         }
 231       while (next == '\\' || next == '?');
 232     }
 233
 234   return next;
 235 }
 236
 237 /* Obtain the next character, after trigraph conversion and skipping
 238    an arbitrarily long string of escaped newlines.  The common case of
 239    no trigraphs or escaped newlines falls through quickly.  On return,
 240    buffer->backup_to points to where to return to if the character is
 241    not to be processed.  */
 242 static cppchar_t
 243 get_effective_char (pfile)
 244      cpp_reader *pfile;
 245 {
 246   cppchar_t next;
 247   cpp_buffer *buffer = pfile->buffer;
 248
 249   buffer->backup_to = buffer->cur;
 250   next = *buffer->cur++;
 251   if (__builtin_expect (next == '?' || next == '\\', 0))
 252     next = skip_escaped_newlines (pfile);
 253
 254   return next;
 255 }
 256
 257 /* SDCC _asm specific */
 258 /* Skip an _asm ... _endasm block.  We find the end of the comment by
 259    seeing _endasm.  Returns non-zero if _asm terminated by EOF, zero
 260    otherwise.  */
 261 static int
 262 skip_asm_block (pfile)
 263      cpp_reader *pfile;
 264 {
 265 #define _ENDASM_STR "endasm"
 266 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
 267
 268   cpp_buffer *buffer = pfile->buffer;
 269   cppchar_t c = EOF;
 270   int prev_space = 0;
 271   int ret = 1;
 272
 273   pfile->state.lexing_comment = 1;
 274   while (buffer->cur != buffer->rlimit)
 275     {
 276       prev_space = is_space(c);
 277       c = *buffer->cur++;
 278
 279       /* FIXME: For speed, create a new character class of characters
 280          of interest inside block comments.  */
 281       if (c == '?' || c == '\\')
 282         c = skip_escaped_newlines (pfile);
 283
 284       if (prev_space && c == '_')
 285         {
 286           if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
 287             strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
 288             {
 289               buffer->cur += _ENDASM_LEN;
 290               ret = 0;
 291               break;
 292             }
 293         }
 294       else if (is_vspace (c))
 295         {
 296           prev_space = is_space(c);
 297           handle_newline (pfile);
 298         }
 299       else if (c == '\t')
 300         adjust_column (pfile);
 301     }
 302
 303   pfile->state.lexing_comment = 0;
 304   return ret;
 305 }
 306
 307 /* Skip a C-style block comment.  We find the end of the comment by
 308    seeing if an asterisk is before every '/' we encounter.  Returns
 309    nonzero if comment terminated by EOF, zero otherwise.  */
 310 static int
 311 skip_block_comment (pfile)
 312      cpp_reader *pfile;
 313 {
 314   cpp_buffer *buffer = pfile->buffer;
 315   cppchar_t c = EOF, prevc = EOF;
 316
 317   pfile->state.lexing_comment = 1;
 318   while (buffer->cur != buffer->rlimit)
 319     {
 320       prevc = c, c = *buffer->cur++;
 321
 322       /* FIXME: For speed, create a new character class of characters
 323          of interest inside block comments.  */
 324       if (c == '?' || c == '\\')
 325         c = skip_escaped_newlines (pfile);
 326
 327       /* People like decorating comments with '*', so check for '/'
 328          instead for efficiency.  */
 329       if (c == '/')
 330         {
 331           if (prevc == '*')
 332             break;
 333
 334           /* Warn about potential nested comments, but not if the '/'
 335              comes immediately before the true comment delimiter.
 336              Don't bother to get it right across escaped newlines.  */
 337           if (CPP_OPTION (pfile, warn_comments)
 338               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 339             cpp_error_with_line (pfile, DL_WARNING,
 340                                  pfile->line, CPP_BUF_COL (buffer),
 341                                  "\"/*\" within comment");
 342         }
 343       else if (is_vspace (c))
 344         handle_newline (pfile);
 345       else if (c == '\t')
 346         adjust_column (pfile);
 347     }
 348
 349   pfile->state.lexing_comment = 0;
 350   return c != '/' || prevc != '*';
 351 }
 352
 353 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 354    terminating newline.  Handles escaped newlines.  Returns nonzero
 355    if a multiline comment.  */
 356 static int
 357 skip_line_comment (pfile)
 358      cpp_reader *pfile;
 359 {
 360   cpp_buffer *buffer = pfile->buffer;
 361   unsigned int orig_line = pfile->line;
 362   cppchar_t c;
 363 #ifdef MULTIBYTE_CHARS
 364   wchar_t wc;
 365   int char_len;
 366 #endif
 367
 368   pfile->state.lexing_comment = 1;
 369 #ifdef MULTIBYTE_CHARS
 370   /* Reset multibyte conversion state.  */
 371   (void) local_mbtowc (NULL, NULL, 0);
 372 #endif
 373   do
 374     {
 375       if (buffer->cur == buffer->rlimit)
 376         goto at_eof;
 377
 378 #ifdef MULTIBYTE_CHARS
 379       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 380                                buffer->rlimit - buffer->cur);
 381       if (char_len == -1)
 382         {
 383           cpp_error (pfile, DL_WARNING,
 384                      "ignoring invalid multibyte character");
 385           char_len = 1;
 386           c = *buffer->cur++;
 387         }
 388       else
 389         {
 390           buffer->cur += char_len;
 391           c = wc;
 392         }
 393 #else
 394       c = *buffer->cur++;
 395 #endif
 396       if (c == '?' || c == '\\')
 397         c = skip_escaped_newlines (pfile);
 398     }
 399   while (!is_vspace (c));
 400
 401   /* Step back over the newline, except at EOF.  */
 402   buffer->cur--;
 403  at_eof:
 404
 405   pfile->state.lexing_comment = 0;
 406   return orig_line != pfile->line;
 407 }
 408
 409 /* pfile->buffer->cur is one beyond the \t character.  Update
 410    col_adjust so we track the column correctly.  */
 411 static void
 412 adjust_column (pfile)
 413      cpp_reader *pfile;
 414 {
 415   cpp_buffer *buffer = pfile->buffer;
 416   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 417
 418   /* Round it up to multiple of the tabstop, but subtract 1 since the
 419      tab itself occupies a character position.  */
 420   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 421                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 422 }
 423
 424 /* Skips whitespace, saving the next non-whitespace character.
 425    Adjusts pfile->col_adjust to account for tabs.  Without this,
 426    tokens might be assigned an incorrect column.  */
 427 static int
 428 skip_whitespace (pfile, c)
 429      cpp_reader *pfile;
 430      cppchar_t c;
 431 {
 432   cpp_buffer *buffer = pfile->buffer;
 433   unsigned int warned = 0;
 434
 435   do
 436     {
 437       /* Horizontal space always OK.  */
 438       if (c == ' ')
 439         ;
 440       else if (c == '\t')
 441         adjust_column (pfile);
 442       /* Just \f \v or \0 left.  */
 443       else if (c == '\0')
 444         {
 445           if (buffer->cur - 1 == buffer->rlimit)
 446             return 0;
 447           if (!warned)
 448             {
 449               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 450               warned = 1;
 451             }
 452         }
 453       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 454         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 455                              CPP_BUF_COL (buffer),
 456                              "%s in preprocessing directive",
 457                              c == '\f' ? "form feed" : "vertical tab");
 458
 459       c = *buffer->cur++;
 460     }
 461   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 462   while (is_nvspace (c));
 463
 464   buffer->cur--;
 465   return 1;
 466 }
 467
 468 /* See if the characters of a number token are valid in a name (no
 469    '.', '+' or '-').  */
 470 static int
 471 name_p (pfile, string)
 472      cpp_reader *pfile;
 473      const cpp_string *string;
 474 {
 475   unsigned int i;
 476
 477   for (i = 0; i < string->len; i++)
 478     if (!is_idchar (string->text[i]))
 479       return 0;
 480
 481   return 1;
 482 }
 483
 484 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 485    a critical inner loop.  The common case is an identifier which has
 486    not been split by backslash-newline, does not contain a dollar
 487    sign, and has already been scanned (roughly 10:1 ratio of
 488    seen:unseen identifiers in normal code; the distribution is
 489    Poisson-like).  Second most common case is a new identifier, not
 490    split and no dollar sign.  The other possibilities are rare and
 491    have been relegated to parse_slow.  */
 492 static cpp_hashnode *
 493 parse_identifier (pfile)
 494      cpp_reader *pfile;
 495 {
 496   cpp_hashnode *result;
 497   const uchar *cur, *base;
 498
 499   /* Fast-path loop.  Skim over a normal identifier.
 500      N.B. ISIDNUM does not include $.  */
 501   cur = pfile->buffer->cur;
 502   while (ISIDNUM (*cur))
 503     cur++;
 504
 505   /* Check for slow-path cases.  */
 506   if (*cur == '?' || *cur == '\\' || *cur == '$')
 507     {
 508       unsigned int len;
 509
 510       base = parse_slow (pfile, cur, 0, &len);
 511       result = (cpp_hashnode *)
 512         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 513     }
 514   else
 515     {
 516       base = pfile->buffer->cur - 1;
 517       pfile->buffer->cur = cur;
 518       result = (cpp_hashnode *)
 519         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 520     }
 521
 522   /* Rarely, identifiers require diagnostics when lexed.
 523      XXX Has to be forced out of the fast path.  */
 524   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 525                         && !pfile->state.skipping, 0))
 526     {
 527       /* It is allowed to poison the same identifier twice.  */
 528       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 529         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 530                    NODE_NAME (result));
 531
 532       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 533          replacement list of a variadic macro.  */
 534       if (result == pfile->spec_nodes.n__VA_ARGS__
 535           && !pfile->state.va_args_ok)
 536         cpp_error (pfile, DL_PEDWARN,
 537         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 538     }
 539
 540   return result;
 541 }
 542
 543 /* Slow path.  This handles numbers and identifiers which have been
 544    split, or contain dollar signs.  The part of the token from
 545    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 546    1 if it's a number, and 2 if it has a leading period.  Returns a
 547    pointer to the token's NUL-terminated spelling in permanent
 548    storage, and sets PLEN to its length.  */
 549 static uchar *
 550 parse_slow (pfile, cur, number_p, plen)
 551      cpp_reader *pfile;
 552      const uchar *cur;
 553      int number_p;
 554      unsigned int *plen;
 555 {
 556   cpp_buffer *buffer = pfile->buffer;
 557   const uchar *base = buffer->cur - 1;
 558   struct obstack *stack = &pfile->hash_table->stack;
 559   unsigned int c, prevc, saw_dollar = 0;
 560
 561   /* Place any leading period.  */
 562   if (number_p == 2)
 563     obstack_1grow (stack, '.');
 564
 565   /* Copy the part of the token which is known to be okay.  */
 566   obstack_grow (stack, base, cur - base);
 567
 568   /* Now process the part which isn't.  We are looking at one of
 569      '$', '\\', or '?' on entry to this loop.  */
 570   prevc = cur[-1];
 571   c = *cur++;
 572   buffer->cur = cur;
 573   for (;;)
 574     {
 575       /* Potential escaped newline?  */
 576       buffer->backup_to = buffer->cur - 1;
 577       if (c == '?' || c == '\\')
 578         c = skip_escaped_newlines (pfile);
 579
 580       if (number_p)
 581         {
 582           if (!ISXDIGIT (c) && c != '.' && !VALID_SIGN (c, prevc) && !VALID_HEX (c, prevc))
 583             break;
 584
 585             obstack_1grow (stack, c);
 586
 587             base = cur = buffer->cur;
 588             while (ISXDIGIT (*cur))
 589               ++cur;
 590
 591             if (cur != base)
 592               obstack_grow (stack, base, cur - base);
 593
 594             prevc = cur[-1];
 595             c = *cur++;
 596             buffer->cur = cur;
 597         }
 598       else
 599         {
 600           if (!is_idchar (c))
 601             break;
 602
 603           /* Handle normal identifier characters in this loop.  */
 604           do
 605             {
 606               prevc = c;
 607               obstack_1grow (stack, c);
 608
 609               if (c == '$')
 610                 saw_dollar++;
 611
 612               c = *buffer->cur++;
 613             }
 614           while (is_idchar (c));
 615         }
 616     }
 617
 618   /* Step back over the unwanted char.  */
 619   BACKUP ();
 620
 621   /* $ is not an identifier character in the standard, but is commonly
 622      accepted as an extension.  Don't warn about it in skipped
 623      conditional blocks.  */
 624   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 625     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 626
 627   /* Identifiers and numbers are null-terminated.  */
 628   *plen = obstack_object_size (stack);
 629   obstack_1grow (stack, '\0');
 630   return obstack_finish (stack);
 631 }
 632
 633 /* Parse a number, beginning with character C, skipping embedded
 634    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 635    before C.  Place the result in NUMBER.  */
 636 static void
 637 parse_number (pfile, number, leading_period)
 638      cpp_reader *pfile;
 639      cpp_string *number;
 640      int leading_period;
 641 {
 642   const uchar *cur;
 643
 644   /* Fast-path loop.  Skim over a normal number.
 645      N.B. ISIDNUM does not include $.  */
 646   cur = pfile->buffer->cur;
 647
 648   while (ISXDIGIT (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]) || VALID_HEX (*cur, cur[-1]))
 649     cur++;
 650
 651   /* Check for slow-path cases.  */
 652   if (*cur == '?' || *cur == '\\' || *cur == '$')
 653     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 654   else
 655     {
 656       const uchar *base = pfile->buffer->cur - 1;
 657       uchar *dest;
 658
 659       number->len = cur - base + leading_period;
 660       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 661       dest[number->len] = '\0';
 662       number->text = dest;
 663
 664       if (leading_period)
 665         *dest++ = '.';
 666       memcpy (dest, base, cur - base);
 667       pfile->buffer->cur = cur;
 668     }
 669 }
 670
 671 /* Subroutine of parse_string.  */
 672 static int
 673 unescaped_terminator_p (pfile, dest)
 674      cpp_reader *pfile;
 675      const unsigned char *dest;
 676 {
 677   const unsigned char *start, *temp;
 678
 679   /* In #include-style directives, terminators are not escapeable.  */
 680   if (pfile->state.angled_headers)
 681     return 1;
 682
 683   start = BUFF_FRONT (pfile->u_buff);
 684
 685   /* An odd number of consecutive backslashes represents an escaped
 686      terminator.  */
 687   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 688     ;
 689
 690   return ((dest - temp) & 1) == 0;
 691 }
 692
 693 /* Parses a string, character constant, or angle-bracketed header file
 694    name.  Handles embedded trigraphs and escaped newlines.  The stored
 695    string is guaranteed NUL-terminated, but it is not guaranteed that
 696    this is the first NUL since embedded NULs are preserved.
 697
 698    When this function returns, buffer->cur points to the next
 699    character to be processed.  */
 700 static void
 701 parse_string (pfile, token, terminator)
 702      cpp_reader *pfile;
 703      cpp_token *token;
 704      cppchar_t terminator;
 705 {
 706   cpp_buffer *buffer = pfile->buffer;
 707   unsigned char *dest, *limit;
 708   cppchar_t c;
 709   bool warned_nulls = false;
 710 #ifdef MULTIBYTE_CHARS
 711   wchar_t wc;
 712   int char_len;
 713 #endif
 714
 715   dest = BUFF_FRONT (pfile->u_buff);
 716   limit = BUFF_LIMIT (pfile->u_buff);
 717
 718 #ifdef MULTIBYTE_CHARS
 719   /* Reset multibyte conversion state.  */
 720   (void) local_mbtowc (NULL, NULL, 0);
 721 #endif
 722   for (;;)
 723     {
 724       /* We need room for another char, possibly the terminating NUL.  */
 725       if ((size_t) (limit - dest) < 1)
 726         {
 727           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 728           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 729           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 730           limit = BUFF_LIMIT (pfile->u_buff);
 731         }
 732
 733 #ifdef MULTIBYTE_CHARS
 734       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 735                                buffer->rlimit - buffer->cur);
 736       if (char_len == -1)
 737         {
 738           cpp_error (pfile, DL_WARNING,
 739                      "ignoring invalid multibyte character");
 740           char_len = 1;
 741           c = *buffer->cur++;
 742         }
 743       else
 744         {
 745           buffer->cur += char_len;
 746           c = wc;
 747         }
 748 #else
 749       c = *buffer->cur++;
 750 #endif
 751
 752       /* Handle trigraphs, escaped newlines etc.  */
 753       if (c == '?' || c == '\\')
 754         c = skip_escaped_newlines (pfile);
 755
 756       if (c == terminator)
 757         {
 758           if (unescaped_terminator_p (pfile, dest))
 759             break;
 760         }
 761       else if (is_vspace (c))
 762         {
 763           /* No string literal may extend over multiple lines.  In
 764              assembly language, suppress the error except for <>
 765              includes.  This is a kludge around not knowing where
 766              comments are.  */
 767         unterminated:
 768           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 769             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 770                        (int) terminator);
 771           buffer->cur--;
 772           break;
 773         }
 774       else if (c == '\0')
 775         {
 776           if (buffer->cur - 1 == buffer->rlimit)
 777             goto unterminated;
 778           if (!warned_nulls)
 779             {
 780               warned_nulls = true;
 781               cpp_error (pfile, DL_WARNING,
 782                          "null character(s) preserved in literal");
 783             }
 784         }
 785 #ifdef MULTIBYTE_CHARS
 786       if (char_len > 1)
 787         {
 788           for ( ; char_len > 0; --char_len)
 789             *dest++ = (*buffer->cur - char_len);
 790         }
 791       else
 792 #endif
 793         *dest++ = c;
 794     }
 795
 796   *dest = '\0';
 797
 798   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 799   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 800   BUFF_FRONT (pfile->u_buff) = dest + 1;
 801 }
 802
 803 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
 804    comment blocks (when executed with -C option) and
 805    _asm (SDCPP specific) blocks */
 806
 807 /* Count and copy characters from src to dest, excluding CRs:
 808    CRs are automatically generated, because the output is
 809    opened in TEXT mode. If dest == NULL, only count chars */
 810 static unsigned int
 811 copy_text_chars (dest, src, len)
 812      char *dest;
 813      const char *src;
 814      unsigned int len;
 815 {
 816   unsigned int n = 0;
 817   const char *p;
 818
 819   for (p = src; p != src + len; ++p)
 820     {
 821       assert(*p != '\0');
 822
 823       if (*p != '\r')
 824         {
 825           if (dest != NULL)
 826             *dest++ = *p;
 827           ++n;
 828         }
 829     }
 830
 831     return n;
 832 }
 833
 834 /* SDCC _asm specific */
 835 /* The stored comment includes the comment start and any terminator.  */
 836 static void
 837 save_asm (pfile, token, from)
 838      cpp_reader *pfile;
 839      cpp_token *token;
 840      const unsigned char *from;
 841 {
 842 #define _ASM_STR  "_asm"
 843 #define _ASM_LEN  ((sizeof _ASM_STR) - 1)
 844
 845   unsigned char *buffer;
 846   unsigned int text_len, len;
 847
 848   len = pfile->buffer->cur - from;
 849   /* + _ASM_LEN for the initial '_asm'.  */
 850   text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
 851   buffer = _cpp_unaligned_alloc (pfile, text_len);
 852
 853
 854   token->type = CPP_ASM;
 855   token->val.str.len = text_len;
 856   token->val.str.text = buffer;
 857
 858   memcpy (buffer, _ASM_STR, _ASM_LEN);
 859   copy_text_chars (buffer + _ASM_LEN, from, len);
 860 }
 861
 862 /* The stored comment includes the comment start and any terminator.  */
 863 static void
 864 save_comment (pfile, token, from, type)
 865      cpp_reader *pfile;
 866      cpp_token *token;
 867      const unsigned char *from;
 868      cppchar_t type;
 869 {
 870   unsigned char *buffer;
 871   unsigned int len, clen;
 872
 873   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 874
 875   /* C++ comments probably (not definitely) have moved past a new
 876      line, which we don't want to save in the comment.  */
 877   if (is_vspace (pfile->buffer->cur[-1]))
 878     len--;
 879
 880   /* If we are currently in a directive, then we need to store all
 881      C++ comments as C comments internally, and so we need to
 882      allocate a little extra space in that case.
 883
 884      Note that the only time we encounter a directive here is
 885      when we are saving comments in a "#define".  */
 886   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 887
 888   buffer = _cpp_unaligned_alloc (pfile, clen);
 889
 890   token->type = CPP_COMMENT;
 891   token->val.str.len = clen;
 892   token->val.str.text = buffer;
 893
 894   buffer[0] = '/';
 895   copy_text_chars (buffer + 1, from, len);
 896
 897   /* Finish conversion to a C comment, if necessary.  */
 898   if (pfile->state.in_directive && type == '/')
 899     {
 900       buffer[1] = '*';
 901       buffer[clen - 2] = '*';
 902       buffer[clen - 1] = '/';
 903     }
 904 }
 905
 906 /* Allocate COUNT tokens for RUN.  */
 907 void
 908 _cpp_init_tokenrun (run, count)
 909      tokenrun *run;
 910      unsigned int count;
 911 {
 912   run->base = xnewvec (cpp_token, count);
 913   run->limit = run->base + count;
 914   run->next = NULL;
 915 }
 916
 917 /* Returns the next tokenrun, or creates one if there is none.  */
 918 static tokenrun *
 919 next_tokenrun (run)
 920      tokenrun *run;
 921 {
 922   if (run->next == NULL)
 923     {
 924       run->next = xnew (tokenrun);
 925       run->next->prev = run;
 926       _cpp_init_tokenrun (run->next, 250);
 927     }
 928
 929   return run->next;
 930 }
 931
 932 /* Allocate a single token that is invalidated at the same time as the
 933    rest of the tokens on the line.  Has its line and col set to the
 934    same as the last lexed token, so that diagnostics appear in the
 935    right place.  */
 936 cpp_token *
 937 _cpp_temp_token (pfile)
 938      cpp_reader *pfile;
 939 {
 940   cpp_token *old, *result;
 941
 942   old = pfile->cur_token - 1;
 943   if (pfile->cur_token == pfile->cur_run->limit)
 944     {
 945       pfile->cur_run = next_tokenrun (pfile->cur_run);
 946       pfile->cur_token = pfile->cur_run->base;
 947     }
 948
 949   result = pfile->cur_token++;
 950   result->line = old->line;
 951   result->col = old->col;
 952   return result;
 953 }
 954
 955 /* Lex a token into RESULT (external interface).  Takes care of issues
 956    like directive handling, token lookahead, multiple include
 957    optimization and skipping.  */
 958 const cpp_token *
 959 _cpp_lex_token (pfile)
 960      cpp_reader *pfile;
 961 {
 962   cpp_token *result;
 963
 964   for (;;)
 965     {
 966       if (pfile->cur_token == pfile->cur_run->limit)
 967         {
 968           pfile->cur_run = next_tokenrun (pfile->cur_run);
 969           pfile->cur_token = pfile->cur_run->base;
 970         }
 971
 972       if (pfile->lookaheads)
 973         {
 974           pfile->lookaheads--;
 975           result = pfile->cur_token++;
 976         }
 977       else
 978         result = _cpp_lex_direct (pfile);
 979
 980       if (result->flags & BOL)
 981         {
 982           /* Is this a directive.  If _cpp_handle_directive returns
 983              false, it is an assembler #.  */
 984           if (result->type == CPP_HASH
 985               /* 6.10.3 p 11: Directives in a list of macro arguments
 986                  gives undefined behavior.  This implementation
 987                  handles the directive as normal.  */
 988               && pfile->state.parsing_args != 1
 989               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 990             continue;
 991           if (pfile->cb.line_change && !pfile->state.skipping)
 992             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 993         }
 994
 995       /* We don't skip tokens in directives.  */
 996       if (pfile->state.in_directive)
 997         break;
 998
 999       /* Outside a directive, invalidate controlling macros.  At file
1000          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1001          get here and MI optimisation works.  */
1002       pfile->mi_valid = false;
1003
1004       if (!pfile->state.skipping || result->type == CPP_EOF)
1005         break;
1006     }
1007
1008   return result;
1009 }
1010
1011 /* A NUL terminates the current buffer.  For ISO preprocessing this is
1012    EOF, but for traditional preprocessing it indicates we need a line
1013    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
1014    to return a CPP_EOF to the caller.  */
1015 static bool
1016 continue_after_nul (pfile)
1017      cpp_reader *pfile;
1018 {
1019   cpp_buffer *buffer = pfile->buffer;
1020   bool more = false;
1021
1022   buffer->saved_flags = BOL;
1023   if (CPP_OPTION (pfile, traditional))
1024     {
1025       if (pfile->state.in_directive)
1026         return false;
1027
1028       _cpp_remove_overlay (pfile);
1029       more = _cpp_read_logical_line_trad (pfile);
1030       _cpp_overlay_buffer (pfile, pfile->out.base,
1031                            pfile->out.cur - pfile->out.base);
1032       pfile->line = pfile->out.first_line;
1033     }
1034   else
1035     {
1036       /* Stop parsing arguments with a CPP_EOF.  When we finally come
1037          back here, do the work of popping the buffer.  */
1038       if (!pfile->state.parsing_args)
1039         {
1040           if (buffer->cur != buffer->line_base)
1041             {
1042               /* Non-empty files should end in a newline.  Don't warn
1043                  for command line and _Pragma buffers.  */
1044               if (!buffer->from_stage3)
1045                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
1046               handle_newline (pfile);
1047             }
1048
1049           /* Similarly, finish an in-progress directive with CPP_EOF
1050              before popping the buffer.  */
1051           if (!pfile->state.in_directive && buffer->prev)
1052             {
1053               more = !buffer->return_at_eof;
1054               _cpp_pop_buffer (pfile);
1055             }
1056         }
1057     }
1058
1059   return more;
1060 }
1061
1062 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
1063   do {                                          \
1064     if (get_effective_char (pfile) == CHAR)     \
1065       result->type = THEN_TYPE;                 \
1066     else                                        \
1067       {                                         \
1068         BACKUP ();                              \
1069         result->type = ELSE_TYPE;               \
1070       }                                         \
1071   } while (0)
1072
1073 /* Lex a token into pfile->cur_token, which is also incremented, to
1074    get diagnostics pointing to the correct location.
1075
1076    Does not handle issues such as token lookahead, multiple-include
1077    optimisation, directives, skipping etc.  This function is only
1078    suitable for use by _cpp_lex_token, and in special cases like
1079    lex_expansion_token which doesn't care for any of these issues.
1080
1081    When meeting a newline, returns CPP_EOF if parsing a directive,
1082    otherwise returns to the start of the token buffer if permissible.
1083    Returns the location of the lexed token.  */
1084 cpp_token *
1085 _cpp_lex_direct (pfile)
1086      cpp_reader *pfile;
1087 {
1088   cppchar_t c;
1089   cpp_buffer *buffer;
1090   const unsigned char *comment_start;
1091   cpp_token *result = pfile->cur_token++;
1092
1093  fresh_line:
1094   buffer = pfile->buffer;
1095   result->flags = buffer->saved_flags;
1096   buffer->saved_flags = 0;
1097  update_tokens_line:
1098   result->line = pfile->line;
1099
1100  skipped_white:
1101   c = *buffer->cur++;
1102   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1103
1104  trigraph:
1105   switch (c)
1106     {
1107     case ' ': case '\t': case '\f': case '\v': case '\0':
1108       result->flags |= PREV_WHITE;
1109       if (skip_whitespace (pfile, c))
1110         goto skipped_white;
1111
1112       /* End of buffer.  */
1113       buffer->cur--;
1114       if (continue_after_nul (pfile))
1115         goto fresh_line;
1116       result->type = CPP_EOF;
1117       break;
1118
1119     case '\n': case '\r':
1120       handle_newline (pfile);
1121       buffer->saved_flags = BOL;
1122       if (! pfile->state.in_directive)
1123         {
1124           if (pfile->state.parsing_args == 2)
1125             buffer->saved_flags |= PREV_WHITE;
1126           if (!pfile->keep_tokens)
1127             {
1128               pfile->cur_run = &pfile->base_run;
1129               result = pfile->base_run.base;
1130               pfile->cur_token = result + 1;
1131             }
1132           goto fresh_line;
1133         }
1134       result->type = CPP_EOF;
1135       break;
1136
1137     case '?':
1138     case '\\':
1139       /* These could start an escaped newline, or '?' a trigraph.  Let
1140          skip_escaped_newlines do all the work.  */
1141       {
1142         unsigned int line = pfile->line;
1143
1144         c = skip_escaped_newlines (pfile);
1145         if (line != pfile->line)
1146           {
1147             buffer->cur--;
1148             /* We had at least one escaped newline of some sort.
1149                Update the token's line and column.  */
1150             goto update_tokens_line;
1151           }
1152       }
1153
1154       /* We are either the original '?' or '\\', or a trigraph.  */
1155       if (c == '?')
1156         result->type = CPP_QUERY;
1157       else if (c == '\\')
1158         goto random_char;
1159       else
1160         goto trigraph;
1161       break;
1162
1163     case '0': case '1': case '2': case '3': case '4':
1164     case '5': case '6': case '7': case '8': case '9':
1165       result->type = CPP_NUMBER;
1166       parse_number (pfile, &result->val.str, 0);
1167       break;
1168
1169     case 'L':
1170       /* 'L' may introduce wide characters or strings.  */
1171       {
1172         const unsigned char *pos = buffer->cur;
1173
1174         c = get_effective_char (pfile);
1175         if (c == '\'' || c == '"')
1176           {
1177             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1178             parse_string (pfile, result, c);
1179             break;
1180           }
1181         buffer->cur = pos;
1182       }
1183       /* Fall through.  */
1184
1185     start_ident:
1186     case '_':
1187     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1188     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1189     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1190     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1191     case 'y': case 'z':
1192     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1193     case 'G': case 'H': case 'I': case 'J': case 'K':
1194     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1195     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1196     case 'Y': case 'Z':
1197       result->type = CPP_NAME;
1198       result->val.node = parse_identifier (pfile);
1199
1200       /* SDCC _asm specific */
1201       /* handle _asm ... _endasm ;  */
1202       if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1203         {
1204           comment_start = buffer->cur;
1205           result->type = CPP_ASM;
1206           skip_asm_block (pfile);
1207           /* Save the _asm block as a token in its own right.  */
1208           save_asm (pfile, result, comment_start);
1209         }
1210       /* Convert named operators to their proper types.  */
1211       else if (result->val.node->flags & NODE_OPERATOR)
1212         {
1213           result->flags |= NAMED_OP;
1214           result->type = result->val.node->value.operator;
1215         }
1216       break;
1217
1218     case '\'':
1219     case '"':
1220       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1221       parse_string (pfile, result, c);
1222       break;
1223
1224     case '/':
1225       /* A potential block or line comment.  */
1226       comment_start = buffer->cur;
1227       c = get_effective_char (pfile);
1228
1229       if (c == '*')
1230         {
1231           if (skip_block_comment (pfile))
1232             cpp_error (pfile, DL_ERROR, "unterminated comment");
1233         }
1234       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1235                             || CPP_IN_SYSTEM_HEADER (pfile)))
1236         {
1237           /* Warn about comments only if pedantically GNUC89, and not
1238              in system headers.  */
1239           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1240               && ! buffer->warned_cplusplus_comments)
1241             {
1242               cpp_error (pfile, DL_PEDWARN,
1243                          "C++ style comments are not allowed in ISO C90");
1244               cpp_error (pfile, DL_PEDWARN,
1245                          "(this will be reported only once per input file)");
1246               buffer->warned_cplusplus_comments = 1;
1247             }
1248
1249           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1250             cpp_error (pfile, DL_WARNING, "multi-line comment");
1251         }
1252       else if (c == '=')
1253         {
1254           result->type = CPP_DIV_EQ;
1255           break;
1256         }
1257       else
1258         {
1259           BACKUP ();
1260           result->type = CPP_DIV;
1261           break;
1262         }
1263
1264       if (!pfile->state.save_comments)
1265         {
1266           result->flags |= PREV_WHITE;
1267           goto update_tokens_line;
1268         }
1269
1270       /* Save the comment as a token in its own right.  */
1271       save_comment (pfile, result, comment_start, c);
1272       break;
1273
1274     case '<':
1275       if (pfile->state.angled_headers)
1276         {
1277           result->type = CPP_HEADER_NAME;
1278           parse_string (pfile, result, '>');
1279           break;
1280         }
1281
1282       c = get_effective_char (pfile);
1283       if (c == '=')
1284         result->type = CPP_LESS_EQ;
1285       else if (c == '<')
1286         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1287       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1288         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1289       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1290         {
1291           result->type = CPP_OPEN_SQUARE;
1292           result->flags |= DIGRAPH;
1293         }
1294       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1295         {
1296           result->type = CPP_OPEN_BRACE;
1297           result->flags |= DIGRAPH;
1298         }
1299       else
1300         {
1301           BACKUP ();
1302           result->type = CPP_LESS;
1303         }
1304       break;
1305
1306     case '>':
1307       c = get_effective_char (pfile);
1308       if (c == '=')
1309         result->type = CPP_GREATER_EQ;
1310       else if (c == '>')
1311         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1312       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1313         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1314       else
1315         {
1316           BACKUP ();
1317           result->type = CPP_GREATER;
1318         }
1319       break;
1320
1321     case '%':
1322       c = get_effective_char (pfile);
1323       if (c == '=')
1324         result->type = CPP_MOD_EQ;
1325       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1326         {
1327           result->flags |= DIGRAPH;
1328           result->type = CPP_HASH;
1329           if (get_effective_char (pfile) == '%')
1330             {
1331               const unsigned char *pos = buffer->cur;
1332
1333               if (get_effective_char (pfile) == ':')
1334                 result->type = CPP_PASTE;
1335               else
1336                 buffer->cur = pos - 1;
1337             }
1338           else
1339             BACKUP ();
1340         }
1341       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1342         {
1343           result->flags |= DIGRAPH;
1344           result->type = CPP_CLOSE_BRACE;
1345         }
1346       else
1347         {
1348           BACKUP ();
1349           result->type = CPP_MOD;
1350         }
1351       break;
1352
1353     case '.':
1354       result->type = CPP_DOT;
1355       c = get_effective_char (pfile);
1356       if (c == '.')
1357         {
1358           const unsigned char *pos = buffer->cur;
1359
1360           if (get_effective_char (pfile) == '.')
1361             result->type = CPP_ELLIPSIS;
1362           else
1363             buffer->cur = pos - 1;
1364         }
1365       /* All known character sets have 0...9 contiguous.  */
1366       else if (ISDIGIT (c))
1367         {
1368           result->type = CPP_NUMBER;
1369           parse_number (pfile, &result->val.str, 1);
1370         }
1371       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1372         result->type = CPP_DOT_STAR;
1373       else
1374         BACKUP ();
1375       break;
1376
1377     case '+':
1378       c = get_effective_char (pfile);
1379       if (c == '+')
1380         result->type = CPP_PLUS_PLUS;
1381       else if (c == '=')
1382         result->type = CPP_PLUS_EQ;
1383       else
1384         {
1385           BACKUP ();
1386           result->type = CPP_PLUS;
1387         }
1388       break;
1389
1390     case '-':
1391       c = get_effective_char (pfile);
1392       if (c == '>')
1393         {
1394           result->type = CPP_DEREF;
1395           if (CPP_OPTION (pfile, cplusplus))
1396             {
1397               if (get_effective_char (pfile) == '*')
1398                 result->type = CPP_DEREF_STAR;
1399               else
1400                 BACKUP ();
1401             }
1402         }
1403       else if (c == '-')
1404         result->type = CPP_MINUS_MINUS;
1405       else if (c == '=')
1406         result->type = CPP_MINUS_EQ;
1407       else
1408         {
1409           BACKUP ();
1410           result->type = CPP_MINUS;
1411         }
1412       break;
1413
1414     case '&':
1415       c = get_effective_char (pfile);
1416       if (c == '&')
1417         result->type = CPP_AND_AND;
1418       else if (c == '=')
1419         result->type = CPP_AND_EQ;
1420       else
1421         {
1422           BACKUP ();
1423           result->type = CPP_AND;
1424         }
1425       break;
1426
1427     case '|':
1428       c = get_effective_char (pfile);
1429       if (c == '|')
1430         result->type = CPP_OR_OR;
1431       else if (c == '=')
1432         result->type = CPP_OR_EQ;
1433       else
1434         {
1435           BACKUP ();
1436           result->type = CPP_OR;
1437         }
1438       break;
1439
1440     case ':':
1441       c = get_effective_char (pfile);
1442       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1443         result->type = CPP_SCOPE;
1444       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1445         {
1446           result->flags |= DIGRAPH;
1447           result->type = CPP_CLOSE_SQUARE;
1448         }
1449       else
1450         {
1451           BACKUP ();
1452           result->type = CPP_COLON;
1453         }
1454       break;
1455
1456     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1457     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1458     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1459     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1460     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1461
1462     case '~': result->type = CPP_COMPL; break;
1463     case ',': result->type = CPP_COMMA; break;
1464     case '(': result->type = CPP_OPEN_PAREN; break;
1465     case ')': result->type = CPP_CLOSE_PAREN; break;
1466     case '[': result->type = CPP_OPEN_SQUARE; break;
1467     case ']': result->type = CPP_CLOSE_SQUARE; break;
1468     case '{': result->type = CPP_OPEN_BRACE; break;
1469     case '}': result->type = CPP_CLOSE_BRACE; break;
1470     case ';': result->type = CPP_SEMICOLON; break;
1471
1472       /* @ is a punctuator in Objective-C.  */
1473     case '@': result->type = CPP_ATSIGN; break;
1474
1475     case '$':
1476       if (CPP_OPTION (pfile, dollars_in_ident))
1477         goto start_ident;
1478       /* Fall through...  */
1479
1480     random_char:
1481     default:
1482       result->type = CPP_OTHER;
1483       result->val.c = c;
1484       break;
1485     }
1486
1487   return result;
1488 }
1489
1490 /* An upper bound on the number of bytes needed to spell TOKEN,
1491    including preceding whitespace.  */
1492 unsigned int
1493 cpp_token_len (token)
1494      const cpp_token *token;
1495 {
1496   unsigned int len;
1497
1498   switch (TOKEN_SPELL (token))
1499     {
1500     default:            len = 0;                                break;
1501     case SPELL_NUMBER:
1502     case SPELL_STRING:  len = token->val.str.len;               break;
1503     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1504     }
1505   /* 1 for whitespace, 4 for comment delimiters.  */
1506   return len + 5;
1507 }
1508
1509 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1510    already contain the enough space to hold the token's spelling.
1511    Returns a pointer to the character after the last character
1512    written.  */
1513 unsigned char *
1514 cpp_spell_token (pfile, token, buffer)
1515      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1516      const cpp_token *token;
1517      unsigned char *buffer;
1518 {
1519   switch (TOKEN_SPELL (token))
1520     {
1521     case SPELL_OPERATOR:
1522       {
1523         const unsigned char *spelling;
1524         unsigned char c;
1525
1526         if (token->flags & DIGRAPH)
1527           spelling
1528             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1529         else if (token->flags & NAMED_OP)
1530           goto spell_ident;
1531         else
1532           spelling = TOKEN_NAME (token);
1533
1534         while ((c = *spelling++) != '\0')
1535           *buffer++ = c;
1536       }
1537       break;
1538
1539     case SPELL_CHAR:
1540       *buffer++ = token->val.c;
1541       break;
1542
1543     spell_ident:
1544     case SPELL_IDENT:
1545       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1546       buffer += NODE_LEN (token->val.node);
1547       break;
1548
1549     case SPELL_NUMBER:
1550       memcpy (buffer, token->val.str.text, token->val.str.len);
1551       buffer += token->val.str.len;
1552       break;
1553
1554     case SPELL_STRING:
1555       {
1556         int left, right, tag;
1557         switch (token->type)
1558           {
1559           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1560           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1561           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1562           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1563           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1564           default:
1565             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1566                        TOKEN_NAME (token));
1567             return buffer;
1568           }
1569         if (tag) *buffer++ = tag;
1570         *buffer++ = left;
1571         memcpy (buffer, token->val.str.text, token->val.str.len);
1572         buffer += token->val.str.len;
1573         *buffer++ = right;
1574       }
1575       break;
1576
1577     case SPELL_NONE:
1578       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1579       break;
1580     }
1581
1582   return buffer;
1583 }
1584
1585 /* Returns TOKEN spelt as a null-terminated string.  The string is
1586    freed when the reader is destroyed.  Useful for diagnostics.  */
1587 unsigned char *
1588 cpp_token_as_text (pfile, token)
1589      cpp_reader *pfile;
1590      const cpp_token *token;
1591 {
1592   unsigned int len = cpp_token_len (token);
1593   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1594
1595   end = cpp_spell_token (pfile, token, start);
1596   end[0] = '\0';
1597
1598   return start;
1599 }
1600
1601 /* Used by C front ends, which really should move to using
1602    cpp_token_as_text.  */
1603 const char *
1604 cpp_type2name (type)
1605      enum cpp_ttype type;
1606 {
1607   return (const char *) token_spellings[type].name;
1608 }
1609
1610 /* Writes the spelling of token to FP, without any preceding space.
1611    Separated from cpp_spell_token for efficiency - to avoid stdio
1612    double-buffering.  */
1613 void
1614 cpp_output_token (token, fp)
1615      const cpp_token *token;
1616      FILE *fp;
1617 {
1618   switch (TOKEN_SPELL (token))
1619     {
1620     case SPELL_OPERATOR:
1621       {
1622         const unsigned char *spelling;
1623         int c;
1624
1625         if (token->flags & DIGRAPH)
1626           spelling
1627             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1628         else if (token->flags & NAMED_OP)
1629           goto spell_ident;
1630         else
1631           spelling = TOKEN_NAME (token);
1632
1633         c = *spelling;
1634         do
1635           putc (c, fp);
1636         while ((c = *++spelling) != '\0');
1637       }
1638       break;
1639
1640     case SPELL_CHAR:
1641       putc (token->val.c, fp);
1642       break;
1643
1644     spell_ident:
1645     case SPELL_IDENT:
1646       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1647     break;
1648
1649     case SPELL_NUMBER:
1650       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1651       break;
1652
1653     case SPELL_STRING:
1654       {
1655         int left, right, tag;
1656         switch (token->type)
1657           {
1658           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1659           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1660           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1661           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1662           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1663           case CPP_ASM:         left = '\0'; right = '\0'; tag = '\0'; break;
1664           default:
1665             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1666             return;
1667           }
1668         if (tag) putc (tag, fp);
1669         if (left) putc (left, fp);
1670         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1671         if (right) putc (right, fp);
1672       }
1673       break;
1674
1675     case SPELL_NONE:
1676       /* An error, most probably.  */
1677       break;
1678     }
1679 }
1680
1681 /* Compare two tokens.  */
1682 int
1683 _cpp_equiv_tokens (a, b)
1684      const cpp_token *a, *b;
1685 {
1686   if (a->type == b->type && a->flags == b->flags)
1687     switch (TOKEN_SPELL (a))
1688       {
1689       default:                  /* Keep compiler happy.  */
1690       case SPELL_OPERATOR:
1691         return 1;
1692       case SPELL_CHAR:
1693         return a->val.c == b->val.c; /* Character.  */
1694       case SPELL_NONE:
1695         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1696       case SPELL_IDENT:
1697         return a->val.node == b->val.node;
1698       case SPELL_NUMBER:
1699       case SPELL_STRING:
1700         return (a->val.str.len == b->val.str.len
1701                 && !memcmp (a->val.str.text, b->val.str.text,
1702                             a->val.str.len));
1703       }
1704
1705   return 0;
1706 }
1707
1708 /* Returns nonzero if a space should be inserted to avoid an
1709    accidental token paste for output.  For simplicity, it is
1710    conservative, and occasionally advises a space where one is not
1711    needed, e.g. "." and ".2".  */
1712 int
1713 cpp_avoid_paste (pfile, token1, token2)
1714      cpp_reader *pfile;
1715      const cpp_token *token1, *token2;
1716 {
1717   enum cpp_ttype a = token1->type, b = token2->type;
1718   cppchar_t c;
1719
1720   if (token1->flags & NAMED_OP)
1721     a = CPP_NAME;
1722   if (token2->flags & NAMED_OP)
1723     b = CPP_NAME;
1724
1725   c = EOF;
1726   if (token2->flags & DIGRAPH)
1727     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1728   else if (token_spellings[b].category == SPELL_OPERATOR)
1729     c = token_spellings[b].name[0];
1730
1731   /* Quickly get everything that can paste with an '='.  */
1732   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1733     return 1;
1734
1735   switch (a)
1736     {
1737     case CPP_GREATER:   return c == '>' || c == '?';
1738     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1739     case CPP_PLUS:      return c == '+';
1740     case CPP_MINUS:     return c == '-' || c == '>';
1741     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1742     case CPP_MOD:       return c == ':' || c == '>';
1743     case CPP_AND:       return c == '&';
1744     case CPP_OR:        return c == '|';
1745     case CPP_COLON:     return c == ':' || c == '>';
1746     case CPP_DEREF:     return c == '*';
1747     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1748     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1749     case CPP_NAME:      return ((b == CPP_NUMBER
1750                                  && name_p (pfile, &token2->val.str))
1751                                 || b == CPP_NAME
1752                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1753     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1754                                 || c == '.' || c == '+' || c == '-');
1755     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1756                                 && token1->val.c == '@'
1757                                 && (b == CPP_NAME || b == CPP_STRING));
1758     default:            break;
1759     }
1760
1761   return 0;
1762 }
1763
1764 /* Output all the remaining tokens on the current line, and a newline
1765    character, to FP.  Leading whitespace is removed.  If there are
1766    macros, special token padding is not performed.  */
1767 void
1768 cpp_output_line (pfile, fp)
1769      cpp_reader *pfile;
1770      FILE *fp;
1771 {
1772   const cpp_token *token;
1773
1774   token = cpp_get_token (pfile);
1775   while (token->type != CPP_EOF)
1776     {
1777       cpp_output_token (token, fp);
1778       token = cpp_get_token (pfile);
1779       if (token->flags & PREV_WHITE)
1780         putc (' ', fp);
1781     }
1782
1783   putc ('\n', fp);
1784 }
1785
1786 /* Returns the value of a hexadecimal digit.  */
1787 static unsigned int
1788 hex_digit_value (c)
1789      unsigned int c;
1790 {
1791   if (hex_p (c))
1792     return hex_value (c);
1793   else
1794     abort ();
1795 }
1796
1797 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1798    failure if cpplib is not parsing C++ or C99.  Such failure is
1799    silent, and no variables are updated.  Otherwise returns 0, and
1800    warns if -Wtraditional.
1801
1802    [lex.charset]: The character designated by the universal character
1803    name \UNNNNNNNN is that character whose character short name in
1804    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1805    universal character name \uNNNN is that character whose character
1806    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1807    for a universal character name is less than 0x20 or in the range
1808    0x7F-0x9F (inclusive), or if the universal character name
1809    designates a character in the basic source character set, then the
1810    program is ill-formed.
1811
1812    We assume that wchar_t is Unicode, so we don't need to do any
1813    mapping.  Is this ever wrong?
1814
1815    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1816    LIMIT is the end of the string or charconst.  PSTR is updated to
1817    point after the UCS on return, and the UCS is written into PC.  */
1818
1819 static int
1820 maybe_read_ucs (pfile, pstr, limit, pc)
1821      cpp_reader *pfile;
1822      const unsigned char **pstr;
1823      const unsigned char *limit;
1824      cppchar_t *pc;
1825 {
1826   const unsigned char *p = *pstr;
1827   unsigned int code = 0;
1828   unsigned int c = *pc, length;
1829
1830   /* Only attempt to interpret a UCS for C++ and C99.  */
1831   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1832     return 1;
1833
1834   if (CPP_WTRADITIONAL (pfile))
1835     cpp_error (pfile, DL_WARNING,
1836                "the meaning of '\\%c' is different in traditional C", c);
1837
1838   length = (c == 'u' ? 4: 8);
1839
1840   if ((size_t) (limit - p) < length)
1841     {
1842       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1843       /* Skip to the end to avoid more diagnostics.  */
1844       p = limit;
1845     }
1846   else
1847     {
1848       for (; length; length--, p++)
1849         {
1850           c = *p;
1851           if (ISXDIGIT (c))
1852             code = (code << 4) + hex_digit_value (c);
1853           else
1854             {
1855               cpp_error (pfile, DL_ERROR,
1856                          "non-hex digit '%c' in universal-character-name", c);
1857               /* We shouldn't skip in case there are multibyte chars.  */
1858               break;
1859             }
1860         }
1861     }
1862
1863 #ifdef TARGET_EBCDIC
1864   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1865   code = 0x3f;  /* EBCDIC invalid character */
1866 #else
1867  /* True extended characters are OK.  */
1868   if (code >= 0xa0
1869       && !(code & 0x80000000)
1870       && !(code >= 0xD800 && code <= 0xDFFF))
1871     ;
1872   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1873      hex escapes so that this also works with EBCDIC hosts.  */
1874   else if (code == 0x24 || code == 0x40 || code == 0x60)
1875     ;
1876   /* Don't give another error if one occurred above.  */
1877   else if (length == 0)
1878     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1879 #endif
1880
1881   *pstr = p;
1882   *pc = code;
1883   return 0;
1884 }
1885
1886 /* Returns the value of an escape sequence, truncated to the correct
1887    target precision.  PSTR points to the input pointer, which is just
1888    after the backslash.  LIMIT is how much text we have.  WIDE is true
1889    if the escape sequence is part of a wide character constant or
1890    string literal.  Handles all relevant diagnostics.  */
1891 cppchar_t
1892 cpp_parse_escape (pfile, pstr, limit, wide)
1893      cpp_reader *pfile;
1894      const unsigned char **pstr;
1895      const unsigned char *limit;
1896      int wide;
1897 {
1898   int unknown = 0;
1899   const unsigned char *str = *pstr;
1900   cppchar_t c, mask;
1901   unsigned int width;
1902
1903   if (wide)
1904     width = CPP_OPTION (pfile, wchar_precision);
1905   else
1906     width = CPP_OPTION (pfile, char_precision);
1907   if (width < BITS_PER_CPPCHAR_T)
1908     mask = ((cppchar_t) 1 << width) - 1;
1909   else
1910     mask = ~0;
1911
1912   c = *str++;
1913   switch (c)
1914     {
1915     case '\\': case '\'': case '"': case '?': break;
1916     case 'b': c = TARGET_BS;      break;
1917     case 'f': c = TARGET_FF;      break;
1918     case 'n': c = TARGET_NEWLINE; break;
1919     case 'r': c = TARGET_CR;      break;
1920     case 't': c = TARGET_TAB;     break;
1921     case 'v': c = TARGET_VT;      break;
1922
1923     case '(': case '{': case '[': case '%':
1924       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1925          '\%' is used to prevent SCCS from getting confused.  */
1926       unknown = CPP_PEDANTIC (pfile);
1927       break;
1928
1929     case 'a':
1930       if (CPP_WTRADITIONAL (pfile))
1931         cpp_error (pfile, DL_WARNING,
1932                    "the meaning of '\\a' is different in traditional C");
1933       c = TARGET_BELL;
1934       break;
1935
1936     case 'e': case 'E':
1937       if (CPP_PEDANTIC (pfile))
1938         cpp_error (pfile, DL_PEDWARN,
1939                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1940       c = TARGET_ESC;
1941       break;
1942
1943     case 'u': case 'U':
1944       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1945       break;
1946
1947     case 'x':
1948       if (CPP_WTRADITIONAL (pfile))
1949         cpp_error (pfile, DL_WARNING,
1950                    "the meaning of '\\x' is different in traditional C");
1951
1952       {
1953         cppchar_t i = 0, overflow = 0;
1954         int digits_found = 0;
1955
1956         while (str < limit)
1957           {
1958             c = *str;
1959             if (! ISXDIGIT (c))
1960               break;
1961             str++;
1962             overflow |= i ^ (i << 4 >> 4);
1963             i = (i << 4) + hex_digit_value (c);
1964             digits_found = 1;
1965           }
1966
1967         if (!digits_found)
1968           cpp_error (pfile, DL_ERROR,
1969                        "\\x used with no following hex digits");
1970
1971         if (overflow | (i != (i & mask)))
1972           {
1973             cpp_error (pfile, DL_PEDWARN,
1974                        "hex escape sequence out of range");
1975             i &= mask;
1976           }
1977         c = i;
1978       }
1979       break;
1980
1981     case '0':  case '1':  case '2':  case '3':
1982     case '4':  case '5':  case '6':  case '7':
1983       {
1984         size_t count = 0;
1985         cppchar_t i = c - '0';
1986
1987         while (str < limit && ++count < 3)
1988           {
1989             c = *str;
1990             if (c < '0' || c > '7')
1991               break;
1992             str++;
1993             i = (i << 3) + c - '0';
1994           }
1995
1996         if (i != (i & mask))
1997           {
1998             cpp_error (pfile, DL_PEDWARN,
1999                        "octal escape sequence out of range");
2000             i &= mask;
2001           }
2002         c = i;
2003       }
2004       break;
2005
2006     default:
2007       unknown = 1;
2008       break;
2009     }
2010
2011   if (unknown)
2012     {
2013       if (ISGRAPH (c))
2014         cpp_error (pfile, DL_PEDWARN,
2015                    "unknown escape sequence '\\%c'", (int) c);
2016       else
2017         cpp_error (pfile, DL_PEDWARN,
2018                    "unknown escape sequence: '\\%03o'", (int) c);
2019     }
2020
2021   if (c > mask)
2022     {
2023       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
2024       c &= mask;
2025     }
2026
2027   *pstr = str;
2028   return c;
2029 }
2030
2031 /* Interpret a (possibly wide) character constant in TOKEN.
2032    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
2033    points to a variable that is filled in with the number of
2034    characters seen, and UNSIGNEDP to a variable that indicates whether
2035    the result has signed type.  */
2036 cppchar_t
2037 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
2038      cpp_reader *pfile;
2039      const cpp_token *token;
2040      unsigned int *pchars_seen;
2041      int *unsignedp;
2042 {
2043   const unsigned char *str = token->val.str.text;
2044   const unsigned char *limit = str + token->val.str.len;
2045   unsigned int chars_seen = 0;
2046   size_t width, max_chars;
2047   cppchar_t c, mask, result = 0;
2048   bool unsigned_p;
2049
2050 #ifdef MULTIBYTE_CHARS
2051   (void) local_mbtowc (NULL, NULL, 0);
2052 #endif
2053
2054   /* Width in bits.  */
2055   if (token->type == CPP_CHAR)
2056     {
2057       width = CPP_OPTION (pfile, char_precision);
2058       max_chars = CPP_OPTION (pfile, int_precision) / width;
2059       unsigned_p = CPP_OPTION (pfile, unsigned_char);
2060     }
2061   else
2062     {
2063       width = CPP_OPTION (pfile, wchar_precision);
2064       max_chars = 1;
2065       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
2066     }
2067
2068   if (width < BITS_PER_CPPCHAR_T)
2069     mask = ((cppchar_t) 1 << width) - 1;
2070   else
2071     mask = ~0;
2072
2073   while (str < limit)
2074     {
2075 #ifdef MULTIBYTE_CHARS
2076       wchar_t wc;
2077       int char_len;
2078
2079       char_len = local_mbtowc (&wc, str, limit - str);
2080       if (char_len == -1)
2081         {
2082           cpp_error (pfile, DL_WARNING,
2083                      "ignoring invalid multibyte character");
2084           c = *str++;
2085         }
2086       else
2087         {
2088           str += char_len;
2089           c = wc;
2090         }
2091 #else
2092       c = *str++;
2093 #endif
2094
2095       if (c == '\\')
2096         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
2097
2098 #ifdef MAP_CHARACTER
2099       if (ISPRINT (c))
2100         c = MAP_CHARACTER (c);
2101 #endif
2102
2103       chars_seen++;
2104
2105       /* Truncate the character, scale the result and merge the two.  */
2106       c &= mask;
2107       if (width < BITS_PER_CPPCHAR_T)
2108         result = (result << width) | c;
2109       else
2110         result = c;
2111     }
2112
2113   if (chars_seen == 0)
2114     cpp_error (pfile, DL_ERROR, "empty character constant");
2115   else if (chars_seen > 1)
2116     {
2117       /* Multichar charconsts are of type int and therefore signed.  */
2118       unsigned_p = 0;
2119
2120       if (chars_seen > max_chars)
2121         {
2122           chars_seen = max_chars;
2123           cpp_error (pfile, DL_WARNING,
2124                      "character constant too long for its type");
2125         }
2126       else if (CPP_OPTION (pfile, warn_multichar))
2127         cpp_error (pfile, DL_WARNING, "multi-character character constant");
2128     }
2129
2130   /* Sign-extend or truncate the constant to cppchar_t.  The value is
2131      in WIDTH bits, but for multi-char charconsts it's value is the
2132      full target type's width.  */
2133   if (chars_seen > 1)
2134     width *= max_chars;
2135   if (width < BITS_PER_CPPCHAR_T)
2136     {
2137       mask = ((cppchar_t) 1 << width) - 1;
2138       if (unsigned_p || !(result & (1 << (width - 1))))
2139         result &= mask;
2140       else
2141         result |= ~mask;
2142     }
2143
2144   *pchars_seen = chars_seen;
2145   *unsignedp = unsigned_p;
2146   return result;
2147 }
2148
2149 /* Memory buffers.  Changing these three constants can have a dramatic
2150    effect on performance.  The values here are reasonable defaults,
2151    but might be tuned.  If you adjust them, be sure to test across a
2152    range of uses of cpplib, including heavy nested function-like macro
2153    expansion.  Also check the change in peak memory usage (NJAMD is a
2154    good tool for this).  */
2155 #define MIN_BUFF_SIZE 8000
2156 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2157 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2158         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2159
2160 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2161   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2162 #endif
2163
2164 /* Create a new allocation buffer.  Place the control block at the end
2165    of the buffer, so that buffer overflows will cause immediate chaos.  */
2166 static _cpp_buff *
2167 new_buff (len)
2168      size_t len;
2169 {
2170   _cpp_buff *result;
2171   unsigned char *base;
2172
2173   if (len < MIN_BUFF_SIZE)
2174     len = MIN_BUFF_SIZE;
2175   len = CPP_ALIGN (len);
2176
2177   base = xmalloc (len + sizeof (_cpp_buff));
2178   result = (_cpp_buff *) (base + len);
2179   result->base = base;
2180   result->cur = base;
2181   result->limit = base + len;
2182   result->next = NULL;
2183   return result;
2184 }
2185
2186 /* Place a chain of unwanted allocation buffers on the free list.  */
2187 void
2188 _cpp_release_buff (pfile, buff)
2189      cpp_reader *pfile;
2190      _cpp_buff *buff;
2191 {
2192   _cpp_buff *end = buff;
2193
2194   while (end->next)
2195     end = end->next;
2196   end->next = pfile->free_buffs;
2197   pfile->free_buffs = buff;
2198 }
2199
2200 /* Return a free buffer of size at least MIN_SIZE.  */
2201 _cpp_buff *
2202 _cpp_get_buff (pfile, min_size)
2203      cpp_reader *pfile;
2204      size_t min_size;
2205 {
2206   _cpp_buff *result, **p;
2207
2208   for (p = &pfile->free_buffs;; p = &(*p)->next)
2209     {
2210       size_t size;
2211
2212       if (*p == NULL)
2213         return new_buff (min_size);
2214       result = *p;
2215       size = result->limit - result->base;
2216       /* Return a buffer that's big enough, but don't waste one that's
2217          way too big.  */
2218       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2219         break;
2220     }
2221
2222   *p = result->next;
2223   result->next = NULL;
2224   result->cur = result->base;
2225   return result;
2226 }
2227
2228 /* Creates a new buffer with enough space to hold the uncommitted
2229    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2230    the excess bytes to the new buffer.  Chains the new buffer after
2231    BUFF, and returns the new buffer.  */
2232 _cpp_buff *
2233 _cpp_append_extend_buff (pfile, buff, min_extra)
2234      cpp_reader *pfile;
2235      _cpp_buff *buff;
2236      size_t min_extra;
2237 {
2238   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2239   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2240
2241   buff->next = new_buff;
2242   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2243   return new_buff;
2244 }
2245
2246 /* Creates a new buffer with enough space to hold the uncommitted
2247    remaining bytes of the buffer pointed to by BUFF, and at least
2248    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2249    Chains the new buffer before the buffer pointed to by BUFF, and
2250    updates the pointer to point to the new buffer.  */
2251 void
2252 _cpp_extend_buff (pfile, pbuff, min_extra)
2253      cpp_reader *pfile;
2254      _cpp_buff **pbuff;
2255      size_t min_extra;
2256 {
2257   _cpp_buff *new_buff, *old_buff = *pbuff;
2258   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2259
2260   new_buff = _cpp_get_buff (pfile, size);
2261   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2262   new_buff->next = old_buff;
2263   *pbuff = new_buff;
2264 }
2265
2266 /* Free a chain of buffers starting at BUFF.  */
2267 void
2268 _cpp_free_buff (buff)
2269      _cpp_buff *buff;
2270 {
2271   _cpp_buff *next;
2272
2273   for (; buff; buff = next)
2274     {
2275       next = buff->next;
2276       free (buff->base);
2277     }
2278 }
2279
2280 /* Allocate permanent, unaligned storage of length LEN.  */
2281 unsigned char *
2282 _cpp_unaligned_alloc (pfile, len)
2283      cpp_reader *pfile;
2284      size_t len;
2285 {
2286   _cpp_buff *buff = pfile->u_buff;
2287   unsigned char *result = buff->cur;
2288
2289   if (len > (size_t) (buff->limit - result))
2290     {
2291       buff = _cpp_get_buff (pfile, len);
2292       buff->next = pfile->u_buff;
2293       pfile->u_buff = buff;
2294       result = buff->cur;
2295     }
2296
2297   buff->cur = result + len;
2298   return result;
2299 }
2300
2301 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2302    That buffer is used for growing allocations when saving macro
2303    replacement lists in a #define, and when parsing an answer to an
2304    assertion in #assert, #unassert or #if (and therefore possibly
2305    whilst expanding macros).  It therefore must not be used by any
2306    code that they might call: specifically the lexer and the guts of
2307    the macro expander.
2308
2309    All existing other uses clearly fit this restriction: storing
2310    registered pragmas during initialization.  */
2311 unsigned char *
2312 _cpp_aligned_alloc (pfile, len)
2313      cpp_reader *pfile;
2314      size_t len;
2315 {
2316   _cpp_buff *buff = pfile->a_buff;
2317   unsigned char *result = buff->cur;
2318
2319   if (len > (size_t) (buff->limit - result))
2320     {
2321       buff = _cpp_get_buff (pfile, len);
2322       buff->next = pfile->a_buff;
2323       pfile->a_buff = buff;
2324       result = buff->cur;
2325     }
2326
2327   buff->cur = result + len;
2328   return result;
2329 }