git.gag.com Git - fw/sdcc/blob - support/cpp/libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Per Bothner, 1994-95.
   5    Based on CCCP program by Paul Rubin, June 1986
   6    Adapted to ANSI C, Richard Stallman, Jan 1987
   7    Broken out to separate file, Zack Weinberg, Mar 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 3, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "internal.h"
  27 #include <assert.h>
  28
  29 enum spell_type
  30 {
  31   SPELL_OPERATOR = 0,
  32   SPELL_IDENT,
  33   SPELL_LITERAL,
  34   SPELL_NONE
  35 };
  36
  37 struct token_spelling
  38 {
  39   enum spell_type category;
  40   const unsigned char *name;
  41 };
  42
  43 static const unsigned char *const digraph_spellings[] =
  44 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  45
  46 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  47 #define TK(e, s) { SPELL_ ## s,    UC #e },
  48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  49 #undef OP
  50 #undef TK
  51
  52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  54
  55 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  56 static int skip_line_comment (cpp_reader *);
  57 static void skip_whitespace (cpp_reader *, cppchar_t);
  58 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  59 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  60 static void store_comment (cpp_reader *, cpp_token *);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  92                                   buffer->notes_cap);
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       const uchar *pbackslash = NULL;
 118
 119       /* Short circuit for the common case of an un-escaped line with
 120          no trigraphs.  The primary win here is by not writing any
 121          data back to memory until we have to.  */
 122       for (;;)
 123         {
 124           c = *++s;
 125           if (__builtin_expect (c == '\n', false)
 126               || __builtin_expect (c == '\r', false))
 127             {
 128               d = (uchar *) s;
 129
 130               if (__builtin_expect (s == buffer->rlimit, false))
 131                 goto done;
 132
 133               /* DOS line ending? */
 134               if (__builtin_expect (c == '\r', false)
 135                   && s[1] == '\n')
 136                 {
 137                   s++;
 138                   if (s == buffer->rlimit)
 139                     goto done;
 140                 }
 141
 142               if (__builtin_expect (pbackslash == NULL, true))
 143                 goto done;
 144
 145               /* Check for escaped newline.  */
 146               p = d;
 147               while (is_nvspace (p[-1]))
 148                 p--;
 149               if (p - 1 != pbackslash)
 150                 goto done;
 151
 152               /* Have an escaped newline; process it and proceed to
 153                  the slow path.  */
 154               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 155               d = p - 2;
 156               buffer->next_line = p - 1;
 157               break;
 158             }
 159           if (__builtin_expect (c == '\\', false))
 160             pbackslash = s;
 161           else if (__builtin_expect (c == '?', false)
 162                    && __builtin_expect (s[1] == '?', false)
 163                    && _cpp_trigraph_map[s[2]])
 164             {
 165               /* Have a trigraph.  We may or may not have to convert
 166                  it.  Add a line note regardless, for -Wtrigraphs.  */
 167               add_line_note (buffer, s, s[2]);
 168               if (CPP_OPTION (pfile, trigraphs))
 169                 {
 170                   /* We do, and that means we have to switch to the
 171                      slow path.  */
 172                   d = (uchar *) s;
 173                   *d = _cpp_trigraph_map[s[2]];
 174                   s += 2;
 175                   break;
 176                 }
 177             }
 178         }
 179
 180
 181       for (;;)
 182         {
 183           c = *++s;
 184           *++d = c;
 185
 186           if (c == '\n' || c == '\r')
 187             {
 188                   /* Handle DOS line endings.  */
 189               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 190                 s++;
 191               if (s == buffer->rlimit)
 192                 break;
 193
 194               /* Escaped?  */
 195               p = d;
 196               while (p != buffer->next_line && is_nvspace (p[-1]))
 197                 p--;
 198               if (p == buffer->next_line || p[-1] != '\\')
 199                 break;
 200
 201               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 202               d = p - 2;
 203               buffer->next_line = p - 1;
 204             }
 205           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 206             {
 207               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 208               add_line_note (buffer, d, s[2]);
 209               if (CPP_OPTION (pfile, trigraphs))
 210                 {
 211                   *d = _cpp_trigraph_map[s[2]];
 212                   s += 2;
 213                 }
 214             }
 215         }
 216     }
 217   else
 218     {
 219       do
 220         s++;
 221       while (*s != '\n' && *s != '\r');
 222       d = (uchar *) s;
 223
 224       /* Handle DOS line endings.  */
 225       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 226         s++;
 227     }
 228
 229  done:
 230   *d = '\n';
 231   /* A sentinel note that should never be processed.  */
 232   add_line_note (buffer, d + 1, '\n');
 233   buffer->next_line = s + 1;
 234 }
 235
 236 /* Return true if the trigraph indicated by NOTE should be warned
 237    about in a comment.  */
 238 static bool
 239 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 240 {
 241   const uchar *p;
 242
 243   /* Within comments we don't warn about trigraphs, unless the
 244      trigraph forms an escaped newline, as that may change
 245      behavior.  */
 246   if (note->type != '/')
 247     return false;
 248
 249   /* If -trigraphs, then this was an escaped newline iff the next note
 250      is coincident.  */
 251   if (CPP_OPTION (pfile, trigraphs))
 252     return note[1].pos == note->pos;
 253
 254   /* Otherwise, see if this forms an escaped newline.  */
 255   p = note->pos + 3;
 256   while (is_nvspace (*p))
 257     p++;
 258
 259   /* There might have been escaped newlines between the trigraph and the
 260      newline we found.  Hence the position test.  */
 261   return (*p == '\n' && p < note[1].pos);
 262 }
 263
 264 /* Process the notes created by add_line_note as far as the current
 265    location.  */
 266 void
 267 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 268 {
 269   cpp_buffer *buffer = pfile->buffer;
 270
 271   for (;;)
 272     {
 273       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 274       unsigned int col;
 275
 276       if (note->pos > buffer->cur)
 277         break;
 278
 279       buffer->cur_note++;
 280       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 281
 282       if (note->type == '\\' || note->type == ' ')
 283         {
 284           if (note->type == ' ' && !in_comment)
 285             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 286                                  "backslash and newline separated by space");
 287
 288           if (buffer->next_line > buffer->rlimit)
 289             {
 290               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 291                                    "backslash-newline at end of file");
 292               /* Prevent "no newline at end of file" warning.  */
 293               buffer->next_line = buffer->rlimit;
 294             }
 295
 296           buffer->line_base = note->pos;
 297           CPP_INCREMENT_LINE (pfile, 0);
 298         }
 299       else if (_cpp_trigraph_map[note->type])
 300         {
 301           if (CPP_OPTION (pfile, warn_trigraphs)
 302               && (!in_comment || warn_in_comment (pfile, note)))
 303             {
 304               if (CPP_OPTION (pfile, trigraphs))
 305                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 306                                      "trigraph ??%c converted to %c",
 307                                      note->type,
 308                                      (int) _cpp_trigraph_map[note->type]);
 309               else
 310                 {
 311                   cpp_error_with_line
 312                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 313                      "trigraph ??%c ignored, use -trigraphs to enable",
 314                      note->type);
 315                 }
 316             }
 317         }
 318       else
 319         abort ();
 320     }
 321 }
 322
 323 /* SDCC _asm specific */
 324 /* Skip an _asm ... _endasm block.  We find the end of the comment by
 325    seeing _endasm.  Returns non-zero if _asm terminated by EOF, zero
 326    otherwise.  */
 327 static int
 328 skip_asm_block (cpp_reader *pfile)
 329 {
 330 #define _ENDASM_STR "endasm"
 331 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
 332
 333   cpp_buffer *buffer = pfile->buffer;
 334   cppchar_t c = EOF;
 335   int prev_space = 0;
 336   int ret = 1;
 337
 338   while (buffer->cur != buffer->rlimit)
 339     {
 340       prev_space = is_space(c);
 341       c = *buffer->cur++;
 342
 343       if (prev_space && c == '_')
 344         {
 345           if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
 346             strncmp((char *)buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
 347             {
 348               buffer->cur += _ENDASM_LEN;
 349               ret = 0;
 350               break;
 351             }
 352         }
 353       else if (c == '\n')
 354         {
 355           unsigned int cols;
 356           --buffer->cur;
 357           _cpp_process_line_notes (pfile, true);
 358           if (buffer->next_line >= buffer->rlimit)
 359             return true;
 360           _cpp_clean_line (pfile);
 361
 362           cols = buffer->next_line - buffer->line_base;
 363           CPP_INCREMENT_LINE (pfile, cols);
 364         }
 365     }
 366
 367   _cpp_process_line_notes (pfile, true);
 368   return ret;
 369 }
 370
 371 /* Skip a C-style block comment.  We find the end of the comment by
 372    seeing if an asterisk is before every '/' we encounter.  Returns
 373    nonzero if comment terminated by EOF, zero otherwise.
 374
 375    Buffer->cur points to the initial asterisk of the comment.  */
 376 bool
 377 _cpp_skip_block_comment (cpp_reader *pfile)
 378 {
 379   cpp_buffer *buffer = pfile->buffer;
 380   const uchar *cur = buffer->cur;
 381   uchar c;
 382
 383   cur++;
 384   if (*cur == '/')
 385     cur++;
 386
 387   for (;;)
 388     {
 389       /* People like decorating comments with '*', so check for '/'
 390          instead for efficiency.  */
 391       c = *cur++;
 392
 393       if (c == '/')
 394         {
 395           if (cur[-2] == '*')
 396             break;
 397
 398           /* Warn about potential nested comments, but not if the '/'
 399              comes immediately before the true comment delimiter.
 400              Don't bother to get it right across escaped newlines.  */
 401           if (CPP_OPTION (pfile, warn_comments)
 402               && cur[0] == '*' && cur[1] != '/')
 403             {
 404               buffer->cur = cur;
 405               cpp_error_with_line (pfile, CPP_DL_WARNING,
 406                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 407                                    "\"/*\" within comment");
 408             }
 409         }
 410       else if (c == '\n')
 411         {
 412           unsigned int cols;
 413           buffer->cur = cur - 1;
 414           _cpp_process_line_notes (pfile, true);
 415           if (buffer->next_line >= buffer->rlimit)
 416             return true;
 417           _cpp_clean_line (pfile);
 418
 419           cols = buffer->next_line - buffer->line_base;
 420           CPP_INCREMENT_LINE (pfile, cols);
 421
 422           cur = buffer->cur;
 423         }
 424     }
 425
 426   buffer->cur = cur;
 427   _cpp_process_line_notes (pfile, true);
 428   return false;
 429 }
 430
 431 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 432    terminating newline.  Handles escaped newlines.  Returns nonzero
 433    if a multiline comment.  */
 434 static int
 435 skip_line_comment (cpp_reader *pfile)
 436 {
 437   cpp_buffer *buffer = pfile->buffer;
 438   source_location orig_line = pfile->line_table->highest_line;
 439
 440   while (*buffer->cur != '\n')
 441     buffer->cur++;
 442
 443   _cpp_process_line_notes (pfile, true);
 444   return orig_line != pfile->line_table->highest_line;
 445 }
 446
 447 /* Skips whitespace, saving the next non-whitespace character.  */
 448 static void
 449 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 450 {
 451   cpp_buffer *buffer = pfile->buffer;
 452   bool saw_NUL = false;
 453
 454   do
 455     {
 456       /* Horizontal space always OK.  */
 457       if (c == ' ' || c == '\t')
 458         ;
 459       /* Just \f \v or \0 left.  */
 460       else if (c == '\0')
 461         saw_NUL = true;
 462       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 463         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 464                              CPP_BUF_COL (buffer),
 465                              "%s in preprocessing directive",
 466                              c == '\f' ? "form feed" : "vertical tab");
 467
 468       c = *buffer->cur++;
 469     }
 470   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 471   while (is_nvspace (c));
 472
 473   if (saw_NUL)
 474     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 475
 476   buffer->cur--;
 477 }
 478
 479 /* See if the characters of a number token are valid in a name (no
 480    '.', '+' or '-').  */
 481 static int
 482 name_p (cpp_reader *pfile, const cpp_string *string)
 483 {
 484   unsigned int i;
 485
 486   for (i = 0; i < string->len; i++)
 487     if (!is_idchar (string->text[i]))
 488       return 0;
 489
 490   return 1;
 491 }
 492
 493 /* After parsing an identifier or other sequence, produce a warning about
 494    sequences not in NFC/NFKC.  */
 495 static void
 496 warn_about_normalization (cpp_reader *pfile,
 497                           const cpp_token *token,
 498                           const struct normalize_state *s)
 499 {
 500   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
 501       && !pfile->state.skipping)
 502     {
 503       /* Make sure that the token is printed using UCNs, even
 504          if we'd otherwise happily print UTF-8.  */
 505       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
 506       size_t sz;
 507
 508       sz = cpp_spell_token (pfile, token, buf, false) - buf;
 509       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
 510         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 511                              "`%.*s' is not in NFKC", (int) sz, buf);
 512       else
 513         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 514                              "`%.*s' is not in NFC", (int) sz, buf);
 515     }
 516 }
 517
 518 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 519    an identifier.  FIRST is TRUE if this starts an identifier.  */
 520 static bool
 521 forms_identifier_p (cpp_reader *pfile, int first,
 522                     struct normalize_state *state)
 523 {
 524   cpp_buffer *buffer = pfile->buffer;
 525
 526   if (*buffer->cur == '$')
 527     {
 528       if (!CPP_OPTION (pfile, dollars_in_ident))
 529         return false;
 530
 531       buffer->cur++;
 532       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 533         {
 534           CPP_OPTION (pfile, warn_dollars) = 0;
 535           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 536         }
 537
 538       return true;
 539     }
 540
 541   /* Is this a syntactically valid UCN?  */
 542   if (CPP_OPTION (pfile, extended_identifiers)
 543       && *buffer->cur == '\\'
 544       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 545     {
 546       buffer->cur += 2;
 547       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
 548                           state))
 549         return true;
 550       buffer->cur -= 2;
 551     }
 552
 553   return false;
 554 }
 555
 556 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 557 static cpp_hashnode *
 558 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
 559                 struct normalize_state *nst)
 560 {
 561   cpp_hashnode *result;
 562   const uchar *cur;
 563   unsigned int len;
 564   unsigned int hash = HT_HASHSTEP (0, *base);
 565
 566   cur = pfile->buffer->cur;
 567   if (! starts_ucn)
 568     while (ISIDNUM (*cur))
 569       {
 570         hash = HT_HASHSTEP (hash, *cur);
 571         cur++;
 572       }
 573   pfile->buffer->cur = cur;
 574   if (starts_ucn || forms_identifier_p (pfile, false, nst))
 575     {
 576       /* Slower version for identifiers containing UCNs (or $).  */
 577       do {
 578         while (ISIDNUM (*pfile->buffer->cur))
 579           {
 580             pfile->buffer->cur++;
 581             NORMALIZE_STATE_UPDATE_IDNUM (nst);
 582           }
 583       } while (forms_identifier_p (pfile, false, nst));
 584       result = _cpp_interpret_identifier (pfile, base,
 585                                           pfile->buffer->cur - base);
 586     }
 587   else
 588     {
 589       len = cur - base;
 590       hash = HT_HASHFINISH (hash, len);
 591
 592       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
 593                                                   base, len, hash, HT_ALLOC));
 594     }
 595
 596   /* Rarely, identifiers require diagnostics when lexed.  */
 597   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 598                         && !pfile->state.skipping, 0))
 599     {
 600       /* It is allowed to poison the same identifier twice.  */
 601       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 602         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 603                    NODE_NAME (result));
 604
 605       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 606          replacement list of a variadic macro.  */
 607       if (result == pfile->spec_nodes.n__VA_ARGS__
 608           && !pfile->state.va_args_ok)
 609         cpp_error (pfile, CPP_DL_PEDWARN,
 610                    "__VA_ARGS__ can only appear in the expansion"
 611                    " of a C99 variadic macro");
 612     }
 613
 614   return result;
 615 }
 616
 617 /* SDCC specific */
 618 /* Pedantic parse a number, beginning with character C, skipping embedded
 619    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 620    before C.  Place the result in NUMBER.  */
 621 static void
 622 pedantic_lex_number (cpp_reader *pfile, cpp_string *number)
 623 {
 624 #define get_effective_char(pfile) (*pfile->buffer->cur++)
 625 #define BACKUP() (--pfile->buffer->cur)
 626
 627   enum num_type_e { NT_DEC, NT_HEX, NT_BIN } num_type = NT_DEC;
 628   enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
 629
 630   uchar c = *(pfile->buffer->cur - 1);
 631   struct obstack *stack = &pfile->hash_table->stack;
 632   int len = 0;
 633   int has_whole = 0;
 634   int has_fract = 0;
 635
 636   if ('.' == c)
 637     {
 638       num_part = NP_FRACT;
 639       ++len;
 640       obstack_1grow (stack, '.');
 641       c = get_effective_char (pfile);
 642     }
 643   else
 644     {
 645       if ('0' == c)
 646         {
 647           has_whole = 1;
 648           ++len;
 649           obstack_1grow (stack, c);
 650           c = get_effective_char (pfile);
 651
 652           switch (c)
 653             {
 654             case 'X':
 655             case 'x':
 656               num_type = NT_HEX;
 657               ++len;
 658               obstack_1grow (stack, c);
 659               c = get_effective_char (pfile);
 660               break;
 661
 662             case 'B':
 663             case 'b':
 664               if (!CPP_OPTION (pfile, std))
 665                 {
 666                   num_type = NT_BIN;
 667                   ++len;
 668                   obstack_1grow (stack, c);
 669                   c = get_effective_char (pfile);
 670                 }
 671               break;
 672
 673             case '.':
 674               num_part = NP_FRACT;
 675               ++len;
 676               obstack_1grow (stack, c);
 677               c = get_effective_char (pfile);
 678               break;
 679             }
 680         }
 681     }
 682
 683   for (; ; )
 684     {
 685       switch (num_part)
 686         {
 687         case NP_WHOLE:
 688           if (NT_DEC == num_type)
 689             {
 690               while (ISDIGIT (c))
 691                 {
 692                   has_whole = 1;
 693                   ++len;
 694                   obstack_1grow (stack, c);
 695                   c = get_effective_char (pfile);
 696                 }
 697
 698               if ('.' == c)
 699                 {
 700                   num_part = NP_FRACT;
 701                   ++len;
 702                   obstack_1grow (stack, c);
 703                   c = get_effective_char (pfile);
 704                   continue;
 705                 }
 706               else if ('E' == c || 'e' == c)
 707                 {
 708                   if (has_whole || has_fract)
 709                   {
 710                     num_part = NP_EXP;
 711                     ++len;
 712                     obstack_1grow (stack, c);
 713                     c = get_effective_char (pfile);
 714                     continue;
 715                   }
 716                   else
 717                     break;
 718                 }
 719             }
 720           else if (NT_HEX == num_type)
 721             {
 722               while (ISXDIGIT (c))
 723                 {
 724                   has_whole = 1;
 725                   ++len;
 726                   obstack_1grow (stack, c);
 727                   c = get_effective_char (pfile);
 728                 }
 729
 730               if ('.' == c)
 731                 {
 732                   num_part = NP_FRACT;
 733                   ++len;
 734                   obstack_1grow (stack, c);
 735                   c = get_effective_char (pfile);
 736                   continue;
 737                 }
 738               else if ('P' == c || 'p' == c)
 739                 {
 740                   if (has_whole || has_fract)
 741                     {
 742                       num_part = NP_EXP;
 743                       ++len;
 744                       obstack_1grow (stack, c);
 745                       c = get_effective_char (pfile);
 746                       continue;
 747                     }
 748                   else
 749                     break;
 750                 }
 751             }
 752           else /* (NT_BIN == num_type) */
 753             {
 754               while ((c=='0') || (c=='1'))
 755                 {
 756                   has_whole = 1;
 757                   ++len;
 758                   obstack_1grow (stack, c);
 759                   c = get_effective_char (pfile);
 760                 }
 761
 762               if ('.' == c)
 763                 {
 764                   num_part = NP_FRACT;
 765                   ++len;
 766                   obstack_1grow (stack, c);
 767                   c = get_effective_char (pfile);
 768                   continue;
 769                 }
 770               else if ('P' == c || 'p' == c)
 771                 {
 772                   if (has_whole || has_fract)
 773                     {
 774                       num_part = NP_EXP;
 775                       ++len;
 776                       obstack_1grow (stack, c);
 777                       c = get_effective_char (pfile);
 778                       continue;
 779                     }
 780                   else
 781                     break;
 782                 }
 783             }
 784           num_part = NP_INT_SUFFIX;
 785           continue;
 786
 787         case NP_FRACT:
 788           if (NT_DEC == num_type)
 789             {
 790               while (ISDIGIT (c))
 791                 {
 792                   has_fract = 1;
 793                   ++len;
 794                   obstack_1grow (stack, c);
 795                   c = get_effective_char (pfile);
 796                 }
 797
 798               if ('E' == c || 'e' == c)
 799                 {
 800                   if (has_whole || has_fract)
 801                     {
 802                       num_part = NP_EXP;
 803                       ++len;
 804                       obstack_1grow (stack, c);
 805                       c = get_effective_char (pfile);
 806                       continue;
 807                     }
 808                 }
 809             }
 810           else
 811             {
 812               while (ISXDIGIT (c))
 813                 {
 814                   has_fract = 1;
 815                   ++len;
 816                   obstack_1grow (stack, c);
 817                   c = get_effective_char (pfile);
 818                 }
 819
 820               if ('P' == c || 'p' == c)
 821                 {
 822                   if (has_whole || has_fract)
 823                     {
 824                       num_part = NP_EXP;
 825                       ++len;
 826                       obstack_1grow (stack, c);
 827                       c = get_effective_char (pfile);
 828                       continue;
 829                     }
 830                 }
 831             }
 832           num_part = NP_FLOAT_SUFFIX;
 833           continue;
 834
 835         case NP_EXP:
 836           if ('+' == c || '-' == c)
 837             {
 838               ++len;
 839               obstack_1grow (stack, c);
 840               c = get_effective_char (pfile);
 841             }
 842
 843           while (ISDIGIT (c))
 844             {
 845               ++len;
 846               obstack_1grow (stack, c);
 847               c = get_effective_char (pfile);
 848             }
 849
 850           num_part = NP_FLOAT_SUFFIX;
 851           continue;
 852
 853         case NP_INT_SUFFIX:
 854            if ('L' == c || 'l' == c)
 855             {
 856               uchar prevc = c;
 857
 858               ++len;
 859               obstack_1grow (stack, c);
 860               c = get_effective_char (pfile);
 861
 862               if (c == prevc)
 863                 {
 864                   ++len;
 865                   obstack_1grow (stack, c);
 866                   c = get_effective_char (pfile);
 867                 }
 868             }
 869           else if ('U' == c || 'u' == c)
 870             {
 871               ++len;
 872               obstack_1grow (stack, c);
 873               c = get_effective_char (pfile);
 874             }
 875           break;
 876
 877         case NP_FLOAT_SUFFIX:
 878            if ('F' == c || 'f' == c)
 879             {
 880               ++len;
 881               obstack_1grow (stack, c);
 882               c = get_effective_char (pfile);
 883             }
 884           else if ('L' == c || 'l' == c)
 885             {
 886               ++len;
 887               obstack_1grow (stack, c);
 888               c = get_effective_char (pfile);
 889             }
 890           break;
 891         }
 892       break;
 893     }
 894
 895   /* Step back over the unwanted char.  */
 896   BACKUP ();
 897
 898   number->text = obstack_finish (stack);
 899   number->len = len;
 900 }
 901
 902 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 903 static void
 904 lex_number (cpp_reader *pfile, cpp_string *number,
 905             struct normalize_state *nst)
 906 {
 907   const uchar *cur;
 908   const uchar *base;
 909   uchar *dest;
 910
 911   base = pfile->buffer->cur - 1;
 912   do
 913     {
 914       cur = pfile->buffer->cur;
 915
 916       /* N.B. ISIDNUM does not include $.  */
 917       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 918         {
 919           cur++;
 920           NORMALIZE_STATE_UPDATE_IDNUM (nst);
 921         }
 922
 923       pfile->buffer->cur = cur;
 924     }
 925   while (forms_identifier_p (pfile, false, nst));
 926
 927   number->len = cur - base;
 928   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 929   memcpy (dest, base, number->len);
 930   dest[number->len] = '\0';
 931   number->text = dest;
 932 }
 933
 934 /* Create a token of type TYPE with a literal spelling.  */
 935 static void
 936 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 937                 unsigned int len, enum cpp_ttype type)
 938 {
 939   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 940
 941   memcpy (dest, base, len);
 942   dest[len] = '\0';
 943   token->type = type;
 944   token->val.str.len = len;
 945   token->val.str.text = dest;
 946 }
 947
 948 /* Lexes a string, character constant, or angle-bracketed header file
 949    name.  The stored string contains the spelling, including opening
 950    quote and leading any leading 'L', 'u' or 'U'.  It returns the type
 951    of the literal, or CPP_OTHER if it was not properly terminated, or
 952    CPP_LESS for an unterminated header name which must be relexed as
 953    normal tokens.
 954
 955    The spelling is NUL-terminated, but it is not guaranteed that this
 956    is the first NUL since embedded NULs are preserved.  */
 957 static void
 958 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 959 {
 960   bool saw_NUL = false;
 961   const uchar *cur;
 962   cppchar_t terminator;
 963   enum cpp_ttype type;
 964
 965   cur = base;
 966   terminator = *cur++;
 967   if (terminator == 'L' || terminator == 'u' || terminator == 'U')
 968     terminator = *cur++;
 969   if (terminator == '\"')
 970     type = (*base == 'L' ? CPP_WSTRING :
 971             *base == 'U' ? CPP_STRING32 :
 972             *base == 'u' ? CPP_STRING16 : CPP_STRING);
 973   else if (terminator == '\'')
 974     type = (*base == 'L' ? CPP_WCHAR :
 975             *base == 'U' ? CPP_CHAR32 :
 976             *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
 977   else
 978     terminator = '>', type = CPP_HEADER_NAME;
 979
 980   for (;;)
 981     {
 982       cppchar_t c = *cur++;
 983
 984       /* In #include-style directives, terminators are not escapable.  */
 985       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 986         cur++;
 987       else if (c == terminator)
 988         break;
 989       else if (c == '\n')
 990         {
 991           cur--;
 992           /* Unmatched quotes always yield undefined behavior, but
 993              greedy lexing means that what appears to be an unterminated
 994              header name may actually be a legitimate sequence of tokens.  */
 995           if (terminator == '>')
 996             {
 997               token->type = CPP_LESS;
 998               return;
 999             }
1000           type = CPP_OTHER;
1001           break;
1002         }
1003       else if (c == '\0')
1004         saw_NUL = true;
1005     }
1006
1007   if (saw_NUL && !pfile->state.skipping)
1008     cpp_error (pfile, CPP_DL_WARNING,
1009                "null character(s) preserved in literal");
1010
1011   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1012     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1013                (int) terminator);
1014
1015   pfile->buffer->cur = cur;
1016   create_literal (pfile, token, base, cur - base, type);
1017 }
1018
1019 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
1020    comment blocks (when executed with -C option) and
1021    _asm (SDCPP specific) blocks */
1022
1023 /* Count and copy characters from src to dest, excluding CRs:
1024    CRs are automatically generated, because the output is
1025    opened in TEXT mode. If dest == NULL, only count chars */
1026 static unsigned int
1027 copy_text_chars (unsigned char *dest, const unsigned char *src, unsigned int len)
1028 {
1029   unsigned int n = 0;
1030   const unsigned char *p;
1031
1032   for (p = src; p != src + len; ++p)
1033     {
1034       assert(*p != '\0');
1035
1036       if (*p != '\r')
1037         {
1038           if (dest != NULL)
1039             *dest++ = *p;
1040           ++n;
1041         }
1042     }
1043
1044     return n;
1045 }
1046
1047 /* SDCC _asm specific */
1048 /* The stored comment includes the comment start and any terminator.  */
1049 static void
1050 save_asm (cpp_reader *pfile, cpp_token *token, const unsigned char *from)
1051 {
1052 #define _ASM_STR  "_asm"
1053 #define _ASM_LEN  ((sizeof _ASM_STR) - 1)
1054
1055   unsigned char *buffer;
1056   unsigned int text_len, len;
1057
1058   len = pfile->buffer->cur - from;
1059   /* + _ASM_LEN for the initial '_asm'.  */
1060   text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
1061   buffer = _cpp_unaligned_alloc (pfile, text_len);
1062
1063
1064   token->type = CPP_ASM;
1065   token->val.str.len = text_len;
1066   token->val.str.text = buffer;
1067
1068   memcpy (buffer, _ASM_STR, _ASM_LEN);
1069   copy_text_chars (buffer + _ASM_LEN, from, len);
1070 }
1071
1072 /* Return the comment table. The client may not make any assumption
1073    about the ordering of the table.  */
1074 cpp_comment_table *
1075 cpp_get_comments (cpp_reader *pfile)
1076 {
1077   return &pfile->comments;
1078 }
1079
1080 /* Append a comment to the end of the comment table. */
1081 static void
1082 store_comment (cpp_reader *pfile, cpp_token *token)
1083 {
1084   int len;
1085
1086   if (pfile->comments.allocated == 0)
1087     {
1088       pfile->comments.allocated = 256;
1089       pfile->comments.entries = (cpp_comment *) xmalloc
1090         (pfile->comments.allocated * sizeof (cpp_comment));
1091     }
1092
1093   if (pfile->comments.count == pfile->comments.allocated)
1094     {
1095       pfile->comments.allocated *= 2;
1096       pfile->comments.entries = (cpp_comment *) xrealloc
1097         (pfile->comments.entries,
1098          pfile->comments.allocated * sizeof (cpp_comment));
1099     }
1100
1101   len = token->val.str.len;
1102
1103   /* Copy comment. Note, token may not be NULL terminated. */
1104   pfile->comments.entries[pfile->comments.count].comment =
1105     (char *) xmalloc (sizeof (char) * (len + 1));
1106   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1107           token->val.str.text, len);
1108   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1109
1110   /* Set source location. */
1111   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1112
1113   /* Increment the count of entries in the comment table. */
1114   pfile->comments.count++;
1115 }
1116
1117 /* The stored comment includes the comment start and any terminator.  */
1118 static void
1119 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1120               cppchar_t type)
1121 {
1122   unsigned char *buffer;
1123   unsigned int len, clen;
1124
1125   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1126
1127   /* C++ comments probably (not definitely) have moved past a new
1128      line, which we don't want to save in the comment.  */
1129   if (is_vspace (pfile->buffer->cur[-1]))
1130     len--;
1131
1132   /* If we are currently in a directive, then we need to store all
1133      C++ comments as C comments internally, and so we need to
1134      allocate a little extra space in that case.
1135
1136      Note that the only time we encounter a directive here is
1137      when we are saving comments in a "#define".  */
1138   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1139
1140   buffer = _cpp_unaligned_alloc (pfile, clen);
1141
1142   token->type = CPP_COMMENT;
1143   token->val.str.len = clen;
1144   token->val.str.text = buffer;
1145
1146   buffer[0] = '/';
1147   copy_text_chars (buffer + 1, from, len);
1148
1149   /* Finish conversion to a C comment, if necessary.  */
1150   if (pfile->state.in_directive && type == '/')
1151     {
1152       buffer[1] = '*';
1153       buffer[clen - 2] = '*';
1154       buffer[clen - 1] = '/';
1155     }
1156
1157   /* Finally store this comment for use by clients of libcpp. */
1158   store_comment (pfile, token);
1159 }
1160
1161 /* Allocate COUNT tokens for RUN.  */
1162 void
1163 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1164 {
1165   run->base = XNEWVEC (cpp_token, count);
1166   run->limit = run->base + count;
1167   run->next = NULL;
1168 }
1169
1170 /* Returns the next tokenrun, or creates one if there is none.  */
1171 static tokenrun *
1172 next_tokenrun (tokenrun *run)
1173 {
1174   if (run->next == NULL)
1175     {
1176       run->next = XNEW (tokenrun);
1177       run->next->prev = run;
1178       _cpp_init_tokenrun (run->next, 250);
1179     }
1180
1181   return run->next;
1182 }
1183
1184 /* Look ahead in the input stream.  */
1185 const cpp_token *
1186 cpp_peek_token (cpp_reader *pfile, int index)
1187 {
1188   cpp_context *context = pfile->context;
1189   const cpp_token *peektok;
1190   int count;
1191
1192   /* First, scan through any pending cpp_context objects.  */
1193   while (context->prev)
1194     {
1195       ptrdiff_t sz = (context->direct_p
1196                       ? LAST (context).token - FIRST (context).token
1197                       : LAST (context).ptoken - FIRST (context).ptoken);
1198
1199       if (index < (int) sz)
1200         return (context->direct_p
1201                 ? FIRST (context).token + index
1202                 : *(FIRST (context).ptoken + index));
1203
1204       index -= (int) sz;
1205       context = context->prev;
1206     }
1207
1208   /* We will have to read some new tokens after all (and do so
1209      without invalidating preceding tokens).  */
1210   count = index;
1211   pfile->keep_tokens++;
1212
1213   do
1214     {
1215       peektok = _cpp_lex_token (pfile);
1216       if (peektok->type == CPP_EOF)
1217         return peektok;
1218     }
1219   while (index--);
1220
1221   _cpp_backup_tokens_direct (pfile, count + 1);
1222   pfile->keep_tokens--;
1223
1224   return peektok;
1225 }
1226
1227 /* Allocate a single token that is invalidated at the same time as the
1228    rest of the tokens on the line.  Has its line and col set to the
1229    same as the last lexed token, so that diagnostics appear in the
1230    right place.  */
1231 cpp_token *
1232 _cpp_temp_token (cpp_reader *pfile)
1233 {
1234   cpp_token *old, *result;
1235   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1236   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1237
1238   old = pfile->cur_token - 1;
1239   /* Any pre-existing lookaheads must not be clobbered.  */
1240   if (la)
1241     {
1242       if (sz <= la)
1243         {
1244           tokenrun *next = next_tokenrun (pfile->cur_run);
1245
1246           if (sz < la)
1247             memmove (next->base + 1, next->base,
1248                      (la - sz) * sizeof (cpp_token));
1249
1250           next->base[0] = pfile->cur_run->limit[-1];
1251         }
1252
1253       if (sz > 1)
1254         memmove (pfile->cur_token + 1, pfile->cur_token,
1255                  MIN (la, sz - 1) * sizeof (cpp_token));
1256     }
1257
1258   if (!sz && pfile->cur_token == pfile->cur_run->limit)
1259     {
1260       pfile->cur_run = next_tokenrun (pfile->cur_run);
1261       pfile->cur_token = pfile->cur_run->base;
1262     }
1263
1264   result = pfile->cur_token++;
1265   result->src_loc = old->src_loc;
1266   return result;
1267 }
1268
1269 /* Lex a token into RESULT (external interface).  Takes care of issues
1270    like directive handling, token lookahead, multiple include
1271    optimization and skipping.  */
1272 const cpp_token *
1273 _cpp_lex_token (cpp_reader *pfile)
1274 {
1275   cpp_token *result;
1276
1277   for (;;)
1278     {
1279       if (pfile->cur_token == pfile->cur_run->limit)
1280         {
1281           pfile->cur_run = next_tokenrun (pfile->cur_run);
1282           pfile->cur_token = pfile->cur_run->base;
1283         }
1284       /* We assume that the current token is somewhere in the current
1285          run.  */
1286       if (pfile->cur_token < pfile->cur_run->base
1287           || pfile->cur_token >= pfile->cur_run->limit)
1288         abort ();
1289
1290       if (pfile->lookaheads)
1291         {
1292           pfile->lookaheads--;
1293           result = pfile->cur_token++;
1294         }
1295       else
1296         result = _cpp_lex_direct (pfile);
1297
1298       if (result->flags & BOL)
1299         {
1300           /* Is this a directive.  If _cpp_handle_directive returns
1301              false, it is an assembler #.  */
1302           if (result->type == CPP_HASH
1303               /* 6.10.3 p 11: Directives in a list of macro arguments
1304                  gives undefined behavior.  This implementation
1305                  handles the directive as normal.  */
1306               && pfile->state.parsing_args != 1)
1307             {
1308               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1309                 {
1310                   if (pfile->directive_result.type == CPP_PADDING)
1311                     continue;
1312                   result = &pfile->directive_result;
1313                 }
1314             }
1315           else if (pfile->state.in_deferred_pragma)
1316             result = &pfile->directive_result;
1317
1318           if (pfile->cb.line_change && !pfile->state.skipping)
1319             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1320         }
1321
1322       /* We don't skip tokens in directives.  */
1323       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1324         break;
1325
1326       /* Outside a directive, invalidate controlling macros.  At file
1327          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1328          get here and MI optimization works.  */
1329       pfile->mi_valid = false;
1330
1331       if (!pfile->state.skipping || result->type == CPP_EOF)
1332         break;
1333     }
1334
1335   return result;
1336 }
1337
1338 /* Returns true if a fresh line has been loaded.  */
1339 bool
1340 _cpp_get_fresh_line (cpp_reader *pfile)
1341 {
1342   int return_at_eof;
1343
1344   /* We can't get a new line until we leave the current directive.  */
1345   if (pfile->state.in_directive)
1346     return false;
1347
1348   for (;;)
1349     {
1350       cpp_buffer *buffer = pfile->buffer;
1351
1352       if (!buffer->need_line)
1353         return true;
1354
1355       if (buffer->next_line < buffer->rlimit)
1356         {
1357           _cpp_clean_line (pfile);
1358           return true;
1359         }
1360
1361       /* First, get out of parsing arguments state.  */
1362       if (pfile->state.parsing_args)
1363         return false;
1364
1365       /* End of buffer.  Non-empty files should end in a newline.  */
1366       if (buffer->buf != buffer->rlimit
1367           && buffer->next_line > buffer->rlimit
1368           && !buffer->from_stage3)
1369         {
1370           /* Clip to buffer size.  */
1371           buffer->next_line = buffer->rlimit;
1372         }
1373
1374       return_at_eof = buffer->return_at_eof;
1375       _cpp_pop_buffer (pfile);
1376       if (pfile->buffer == NULL || return_at_eof)
1377         return false;
1378     }
1379 }
1380
1381 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
1382   do                                                    \
1383     {                                                   \
1384       result->type = ELSE_TYPE;                         \
1385       if (*buffer->cur == CHAR)                         \
1386         buffer->cur++, result->type = THEN_TYPE;        \
1387     }                                                   \
1388   while (0)
1389
1390 /* Lex a token into pfile->cur_token, which is also incremented, to
1391    get diagnostics pointing to the correct location.
1392
1393    Does not handle issues such as token lookahead, multiple-include
1394    optimization, directives, skipping etc.  This function is only
1395    suitable for use by _cpp_lex_token, and in special cases like
1396    lex_expansion_token which doesn't care for any of these issues.
1397
1398    When meeting a newline, returns CPP_EOF if parsing a directive,
1399    otherwise returns to the start of the token buffer if permissible.
1400    Returns the location of the lexed token.  */
1401 cpp_token *
1402 _cpp_lex_direct (cpp_reader *pfile)
1403 {
1404   cppchar_t c;
1405   cpp_buffer *buffer;
1406   const unsigned char *comment_start;
1407   cpp_token *result = pfile->cur_token++;
1408
1409  fresh_line:
1410   result->flags = 0;
1411   buffer = pfile->buffer;
1412   if (buffer->need_line)
1413     {
1414       if (pfile->state.in_deferred_pragma)
1415         {
1416           result->type = CPP_PRAGMA_EOL;
1417           pfile->state.in_deferred_pragma = false;
1418           if (!pfile->state.pragma_allow_expansion)
1419             pfile->state.prevent_expansion--;
1420           return result;
1421         }
1422       if (!_cpp_get_fresh_line (pfile))
1423         {
1424           result->type = CPP_EOF;
1425           if (!pfile->state.in_directive)
1426             {
1427               /* Tell the compiler the line number of the EOF token.  */
1428               result->src_loc = pfile->line_table->highest_line;
1429               result->flags = BOL;
1430             }
1431           return result;
1432         }
1433       if (!pfile->keep_tokens)
1434         {
1435           pfile->cur_run = &pfile->base_run;
1436           result = pfile->base_run.base;
1437           pfile->cur_token = result + 1;
1438         }
1439       result->flags = BOL;
1440       if (pfile->state.parsing_args == 2)
1441         result->flags |= PREV_WHITE;
1442     }
1443   buffer = pfile->buffer;
1444  update_tokens_line:
1445   result->src_loc = pfile->line_table->highest_line;
1446
1447  skipped_white:
1448   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1449       && !pfile->overlaid_buffer)
1450     {
1451       _cpp_process_line_notes (pfile, false);
1452       result->src_loc = pfile->line_table->highest_line;
1453     }
1454   c = *buffer->cur++;
1455
1456   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1457                                CPP_BUF_COLUMN (buffer, buffer->cur));
1458
1459   switch (c)
1460     {
1461     case ' ': case '\t': case '\f': case '\v': case '\0':
1462       result->flags |= PREV_WHITE;
1463       skip_whitespace (pfile, c);
1464       goto skipped_white;
1465
1466     case '\n':
1467       if (buffer->cur < buffer->rlimit)
1468         CPP_INCREMENT_LINE (pfile, 0);
1469       buffer->need_line = true;
1470       goto fresh_line;
1471
1472     case '0': case '1': case '2': case '3': case '4':
1473     case '5': case '6': case '7': case '8': case '9':
1474       {
1475         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1476         result->type = CPP_NUMBER;
1477         if (CPP_OPTION (pfile, pedantic_parse_number))
1478           pedantic_lex_number (pfile, &result->val.str);
1479         else
1480           lex_number (pfile, &result->val.str, &nst);
1481         warn_about_normalization (pfile, result, &nst);
1482         break;
1483       }
1484
1485     case 'L':
1486     case 'u':
1487     case 'U':
1488       /* 'L', 'u' or 'U' may introduce wide characters or strings.  */
1489       if (c == 'L' || CPP_OPTION (pfile, uliterals))
1490         {
1491           if (*buffer->cur == '\'' || *buffer->cur == '"')
1492             {
1493               lex_string (pfile, result, buffer->cur - 1);
1494               break;
1495             }
1496         }
1497       /* Fall through.  */
1498
1499     case '_':
1500     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1501     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1502     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1503     case 's': case 't':           case 'v': case 'w': case 'x':
1504     case 'y': case 'z':
1505     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1506     case 'G': case 'H': case 'I': case 'J': case 'K':
1507     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1508     case 'S': case 'T':           case 'V': case 'W': case 'X':
1509     case 'Y': case 'Z':
1510       result->type = CPP_NAME;
1511       {
1512         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1513         result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1514                                            &nst);
1515         warn_about_normalization (pfile, result, &nst);
1516       }
1517
1518       /* SDCC _asm specific */
1519       /* handle _asm ... _endasm ;  */
1520       if (CPP_OPTION (pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1521         {
1522           comment_start = buffer->cur;
1523           result->type = CPP_ASM;
1524           skip_asm_block (pfile);
1525           /* Save the _asm block as a token in its own right.  */
1526           save_asm (pfile, result, comment_start);
1527         }
1528       /* Convert named operators to their proper types.  */
1529       else if (result->val.node->flags & NODE_OPERATOR)
1530         {
1531           result->flags |= NAMED_OP;
1532           result->type = (enum cpp_ttype) result->val.node->directive_index;
1533         }
1534       break;
1535
1536     case '\'':
1537     case '"':
1538       lex_string (pfile, result, buffer->cur - 1);
1539       break;
1540
1541     case '/':
1542       /* A potential block or line comment.  */
1543       comment_start = buffer->cur;
1544       c = *buffer->cur;
1545
1546       if (c == '*')
1547         {
1548           if (_cpp_skip_block_comment (pfile))
1549             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1550         }
1551       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1552                             || cpp_in_system_header (pfile)))
1553         {
1554           /* Warn about comments only if pedantically GNUC89, and not
1555              in system headers.  */
1556           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1557               && ! buffer->warned_cplusplus_comments)
1558             {
1559               cpp_error (pfile, CPP_DL_PEDWARN,
1560                          "C++ style comments are not allowed in ISO C90");
1561               cpp_error (pfile, CPP_DL_PEDWARN,
1562                          "(this will be reported only once per input file)");
1563               buffer->warned_cplusplus_comments = 1;
1564             }
1565
1566           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1567             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1568         }
1569       else if (c == '=')
1570         {
1571           buffer->cur++;
1572           result->type = CPP_DIV_EQ;
1573           break;
1574         }
1575       else
1576         {
1577           result->type = CPP_DIV;
1578           break;
1579         }
1580
1581       if (!pfile->state.save_comments)
1582         {
1583           result->flags |= PREV_WHITE;
1584           goto update_tokens_line;
1585         }
1586
1587       /* Save the comment as a token in its own right.  */
1588       save_comment (pfile, result, comment_start, c);
1589       break;
1590
1591     case '<':
1592       if (pfile->state.angled_headers)
1593         {
1594           lex_string (pfile, result, buffer->cur - 1);
1595           if (result->type != CPP_LESS)
1596             break;
1597         }
1598
1599       result->type = CPP_LESS;
1600       if (*buffer->cur == '=')
1601         buffer->cur++, result->type = CPP_LESS_EQ;
1602       else if (*buffer->cur == '<')
1603         {
1604           buffer->cur++;
1605           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1606         }
1607       else if (CPP_OPTION (pfile, digraphs))
1608         {
1609           if (*buffer->cur == ':')
1610             {
1611               buffer->cur++;
1612               result->flags |= DIGRAPH;
1613               result->type = CPP_OPEN_SQUARE;
1614             }
1615           else if (*buffer->cur == '%')
1616             {
1617               buffer->cur++;
1618               result->flags |= DIGRAPH;
1619               result->type = CPP_OPEN_BRACE;
1620             }
1621         }
1622       break;
1623
1624     case '>':
1625       result->type = CPP_GREATER;
1626       if (*buffer->cur == '=')
1627         buffer->cur++, result->type = CPP_GREATER_EQ;
1628       else if (*buffer->cur == '>')
1629         {
1630           buffer->cur++;
1631           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1632         }
1633       break;
1634
1635     case '%':
1636       result->type = CPP_MOD;
1637       if (*buffer->cur == '=')
1638         buffer->cur++, result->type = CPP_MOD_EQ;
1639       else if (CPP_OPTION (pfile, digraphs))
1640         {
1641           if (*buffer->cur == ':')
1642             {
1643               buffer->cur++;
1644               result->flags |= DIGRAPH;
1645               result->type = CPP_HASH;
1646               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1647                 buffer->cur += 2, result->type = CPP_PASTE;
1648             }
1649           else if (*buffer->cur == '>')
1650             {
1651               buffer->cur++;
1652               result->flags |= DIGRAPH;
1653               result->type = CPP_CLOSE_BRACE;
1654             }
1655         }
1656       break;
1657
1658     case '.':
1659       result->type = CPP_DOT;
1660       if (ISDIGIT (*buffer->cur))
1661         {
1662           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1663           result->type = CPP_NUMBER;
1664           if (CPP_OPTION (pfile, pedantic_parse_number))
1665             pedantic_lex_number (pfile, &result->val.str);
1666           else
1667             lex_number (pfile, &result->val.str, &nst);
1668           warn_about_normalization (pfile, result, &nst);
1669         }
1670       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1671         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1672       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1673         buffer->cur++, result->type = CPP_DOT_STAR;
1674       break;
1675
1676     case '+':
1677       result->type = CPP_PLUS;
1678       if (*buffer->cur == '+')
1679         buffer->cur++, result->type = CPP_PLUS_PLUS;
1680       else if (*buffer->cur == '=')
1681         buffer->cur++, result->type = CPP_PLUS_EQ;
1682       break;
1683
1684     case '-':
1685       result->type = CPP_MINUS;
1686       if (*buffer->cur == '>')
1687         {
1688           buffer->cur++;
1689           result->type = CPP_DEREF;
1690           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1691             buffer->cur++, result->type = CPP_DEREF_STAR;
1692         }
1693       else if (*buffer->cur == '-')
1694         buffer->cur++, result->type = CPP_MINUS_MINUS;
1695       else if (*buffer->cur == '=')
1696         buffer->cur++, result->type = CPP_MINUS_EQ;
1697       break;
1698
1699     case '&':
1700       result->type = CPP_AND;
1701       if (*buffer->cur == '&')
1702         buffer->cur++, result->type = CPP_AND_AND;
1703       else if (*buffer->cur == '=')
1704         buffer->cur++, result->type = CPP_AND_EQ;
1705       break;
1706
1707     case '|':
1708       result->type = CPP_OR;
1709       if (*buffer->cur == '|')
1710         buffer->cur++, result->type = CPP_OR_OR;
1711       else if (*buffer->cur == '=')
1712         buffer->cur++, result->type = CPP_OR_EQ;
1713       break;
1714
1715     case ':':
1716       result->type = CPP_COLON;
1717       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1718         buffer->cur++, result->type = CPP_SCOPE;
1719       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1720         {
1721           buffer->cur++;
1722           result->flags |= DIGRAPH;
1723           result->type = CPP_CLOSE_SQUARE;
1724         }
1725       break;
1726
1727     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1728     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1729     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1730     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1731     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1732
1733     case '?': result->type = CPP_QUERY; break;
1734     case '~': result->type = CPP_COMPL; break;
1735     case ',': result->type = CPP_COMMA; break;
1736     case '(': result->type = CPP_OPEN_PAREN; break;
1737     case ')': result->type = CPP_CLOSE_PAREN; break;
1738     case '[': result->type = CPP_OPEN_SQUARE; break;
1739     case ']': result->type = CPP_CLOSE_SQUARE; break;
1740     case '{': result->type = CPP_OPEN_BRACE; break;
1741     case '}': result->type = CPP_CLOSE_BRACE; break;
1742     case ';': result->type = CPP_SEMICOLON; break;
1743
1744       /* @ is a punctuator in Objective-C.  */
1745     case '@': result->type = CPP_ATSIGN; break;
1746
1747     case '$':
1748     case '\\':
1749       {
1750         const uchar *base = --buffer->cur;
1751         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1752
1753         if (forms_identifier_p (pfile, true, &nst))
1754           {
1755             result->type = CPP_NAME;
1756             result->val.node = lex_identifier (pfile, base, true, &nst);
1757             warn_about_normalization (pfile, result, &nst);
1758             break;
1759           }
1760         buffer->cur++;
1761       }
1762
1763     default:
1764       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1765       break;
1766     }
1767
1768   return result;
1769 }
1770
1771 /* An upper bound on the number of bytes needed to spell TOKEN.
1772    Does not include preceding whitespace.  */
1773 unsigned int
1774 cpp_token_len (const cpp_token *token)
1775 {
1776   unsigned int len;
1777
1778   switch (TOKEN_SPELL (token))
1779     {
1780     default:            len = 6;                                break;
1781     case SPELL_LITERAL: len = token->val.str.len;               break;
1782     case SPELL_IDENT:   len = NODE_LEN (token->val.node) * 10;  break;
1783     }
1784
1785   return len;
1786 }
1787
1788 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1789    Return the number of bytes read out of NAME.  (There are always
1790    10 bytes written to BUFFER.)  */
1791
1792 static size_t
1793 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1794 {
1795   int j;
1796   int ucn_len = 0;
1797   int ucn_len_c;
1798   unsigned t;
1799   unsigned long utf32;
1800
1801   /* Compute the length of the UTF-8 sequence.  */
1802   for (t = *name; t & 0x80; t <<= 1)
1803     ucn_len++;
1804
1805   utf32 = *name & (0x7F >> ucn_len);
1806   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1807     {
1808       utf32 = (utf32 << 6) | (*++name & 0x3F);
1809
1810       /* Ill-formed UTF-8.  */
1811       if ((*name & ~0x3F) != 0x80)
1812         abort ();
1813     }
1814
1815   *buffer++ = '\\';
1816   *buffer++ = 'U';
1817   for (j = 7; j >= 0; j--)
1818     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1819   return ucn_len;
1820 }
1821
1822
1823 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1824    already contain the enough space to hold the token's spelling.
1825    Returns a pointer to the character after the last character written.
1826    FORSTRING is true if this is to be the spelling after translation
1827    phase 1 (this is different for UCNs).
1828    FIXME: Would be nice if we didn't need the PFILE argument.  */
1829 unsigned char *
1830 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1831                  unsigned char *buffer, bool forstring)
1832 {
1833   switch (TOKEN_SPELL (token))
1834     {
1835     case SPELL_OPERATOR:
1836       {
1837         const unsigned char *spelling;
1838         unsigned char c;
1839
1840         if (token->flags & DIGRAPH)
1841           spelling
1842             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1843         else if (token->flags & NAMED_OP)
1844           goto spell_ident;
1845         else
1846           spelling = TOKEN_NAME (token);
1847
1848         while ((c = *spelling++) != '\0')
1849           *buffer++ = c;
1850       }
1851       break;
1852
1853     spell_ident:
1854     case SPELL_IDENT:
1855       if (forstring)
1856         {
1857           memcpy (buffer, NODE_NAME (token->val.node),
1858                   NODE_LEN (token->val.node));
1859           buffer += NODE_LEN (token->val.node);
1860         }
1861       else
1862         {
1863           size_t i;
1864           const unsigned char * name = NODE_NAME (token->val.node);
1865
1866           for (i = 0; i < NODE_LEN (token->val.node); i++)
1867             if (name[i] & ~0x7F)
1868               {
1869                 i += utf8_to_ucn (buffer, name + i) - 1;
1870                 buffer += 10;
1871               }
1872             else
1873               *buffer++ = NODE_NAME (token->val.node)[i];
1874         }
1875       break;
1876
1877     case SPELL_LITERAL:
1878       memcpy (buffer, token->val.str.text, token->val.str.len);
1879       buffer += token->val.str.len;
1880       break;
1881
1882     case SPELL_NONE:
1883       cpp_error (pfile, CPP_DL_ICE,
1884                  "unspellable token %s", TOKEN_NAME (token));
1885       break;
1886     }
1887
1888   return buffer;
1889 }
1890
1891 /* Returns TOKEN spelt as a null-terminated string.  The string is
1892    freed when the reader is destroyed.  Useful for diagnostics.  */
1893 unsigned char *
1894 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1895 {
1896   unsigned int len = cpp_token_len (token) + 1;
1897   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1898
1899   end = cpp_spell_token (pfile, token, start, false);
1900   end[0] = '\0';
1901
1902   return start;
1903 }
1904
1905 /* Used by C front ends, which really should move to using
1906    cpp_token_as_text.  */
1907 const char *
1908 cpp_type2name (enum cpp_ttype type)
1909 {
1910   return (const char *) token_spellings[type].name;
1911 }
1912
1913 /* Writes the spelling of token to FP, without any preceding space.
1914    Separated from cpp_spell_token for efficiency - to avoid stdio
1915    double-buffering.  */
1916 void
1917 cpp_output_token (const cpp_token *token, FILE *fp)
1918 {
1919   switch (TOKEN_SPELL (token))
1920     {
1921     case SPELL_OPERATOR:
1922       {
1923         const unsigned char *spelling;
1924         int c;
1925
1926         if (token->flags & DIGRAPH)
1927           spelling
1928             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1929         else if (token->flags & NAMED_OP)
1930           goto spell_ident;
1931         else
1932           spelling = TOKEN_NAME (token);
1933
1934         c = *spelling;
1935         do
1936           putc (c, fp);
1937         while ((c = *++spelling) != '\0');
1938       }
1939       break;
1940
1941     spell_ident:
1942     case SPELL_IDENT:
1943       {
1944         size_t i;
1945         const unsigned char * name = NODE_NAME (token->val.node);
1946
1947         for (i = 0; i < NODE_LEN (token->val.node); i++)
1948           if (name[i] & ~0x7F)
1949             {
1950               unsigned char buffer[10];
1951               i += utf8_to_ucn (buffer, name + i) - 1;
1952               fwrite (buffer, 1, 10, fp);
1953             }
1954           else
1955             fputc (NODE_NAME (token->val.node)[i], fp);
1956       }
1957       break;
1958
1959     case SPELL_LITERAL:
1960       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1961       break;
1962
1963     case SPELL_NONE:
1964       /* An error, most probably.  */
1965       break;
1966     }
1967 }
1968
1969 /* Compare two tokens.  */
1970 int
1971 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1972 {
1973   if (a->type == b->type && a->flags == b->flags)
1974     switch (TOKEN_SPELL (a))
1975       {
1976       default:                  /* Keep compiler happy.  */
1977       case SPELL_OPERATOR:
1978         return 1;
1979       case SPELL_NONE:
1980         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1981       case SPELL_IDENT:
1982         return a->val.node == b->val.node;
1983       case SPELL_LITERAL:
1984         return (a->val.str.len == b->val.str.len
1985                 && !memcmp (a->val.str.text, b->val.str.text,
1986                             a->val.str.len));
1987       }
1988
1989   return 0;
1990 }
1991
1992 /* Returns nonzero if a space should be inserted to avoid an
1993    accidental token paste for output.  For simplicity, it is
1994    conservative, and occasionally advises a space where one is not
1995    needed, e.g. "." and ".2".  */
1996 int
1997 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1998                  const cpp_token *token2)
1999 {
2000   enum cpp_ttype a = token1->type, b = token2->type;
2001   cppchar_t c;
2002
2003   if (token1->flags & NAMED_OP)
2004     a = CPP_NAME;
2005   if (token2->flags & NAMED_OP)
2006     b = CPP_NAME;
2007
2008   c = EOF;
2009   if (token2->flags & DIGRAPH)
2010     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2011   else if (token_spellings[b].category == SPELL_OPERATOR)
2012     c = token_spellings[b].name[0];
2013
2014   /* Quickly get everything that can paste with an '='.  */
2015   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2016     return 1;
2017
2018   switch (a)
2019     {
2020     case CPP_GREATER:   return c == '>';
2021     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
2022     case CPP_PLUS:      return c == '+';
2023     case CPP_MINUS:     return c == '-' || c == '>';
2024     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
2025     case CPP_MOD:       return c == ':' || c == '>';
2026     case CPP_AND:       return c == '&';
2027     case CPP_OR:        return c == '|';
2028     case CPP_COLON:     return c == ':' || c == '>';
2029     case CPP_DEREF:     return c == '*';
2030     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
2031     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
2032     case CPP_NAME:      return ((b == CPP_NUMBER
2033                                  && name_p (pfile, &token2->val.str))
2034                                 || b == CPP_NAME
2035                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
2036     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
2037                                 || c == '.' || c == '+' || c == '-');
2038                                       /* UCNs */
2039     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
2040                                  && b == CPP_NAME)
2041                                 || (CPP_OPTION (pfile, objc)
2042                                     && token1->val.str.text[0] == '@'
2043                                     && (b == CPP_NAME || b == CPP_STRING)));
2044     default:            break;
2045     }
2046
2047   return 0;
2048 }
2049
2050 /* Output all the remaining tokens on the current line, and a newline
2051    character, to FP.  Leading whitespace is removed.  If there are
2052    macros, special token padding is not performed.  */
2053 void
2054 cpp_output_line (cpp_reader *pfile, FILE *fp)
2055 {
2056   const cpp_token *token;
2057
2058   token = cpp_get_token (pfile);
2059   while (token->type != CPP_EOF)
2060     {
2061       cpp_output_token (token, fp);
2062       token = cpp_get_token (pfile);
2063       if (token->flags & PREV_WHITE)
2064         putc (' ', fp);
2065     }
2066
2067   putc ('\n', fp);
2068 }
2069
2070 /* Return a string representation of all the remaining tokens on the
2071    current line.  The result is allocated using xmalloc and must be
2072    freed by the caller.  */
2073 unsigned char *
2074 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2075 {
2076   const cpp_token *token;
2077   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2078   unsigned int alloced = 120 + out;
2079   unsigned char *result = (unsigned char *) xmalloc (alloced);
2080
2081   /* If DIR_NAME is empty, there are no initial contents.  */
2082   if (dir_name)
2083     {
2084       sprintf ((char *) result, "#%s ", dir_name);
2085       out += 2;
2086     }
2087
2088   token = cpp_get_token (pfile);
2089   while (token->type != CPP_EOF)
2090     {
2091       unsigned char *last;
2092       /* Include room for a possible space and the terminating nul.  */
2093       unsigned int len = cpp_token_len (token) + 2;
2094
2095       if (out + len > alloced)
2096         {
2097           alloced *= 2;
2098           if (out + len > alloced)
2099             alloced = out + len;
2100           result = (unsigned char *) xrealloc (result, alloced);
2101         }
2102
2103       last = cpp_spell_token (pfile, token, &result[out], 0);
2104       out = last - result;
2105
2106       token = cpp_get_token (pfile);
2107       if (token->flags & PREV_WHITE)
2108         result[out++] = ' ';
2109     }
2110
2111   result[out] = '\0';
2112   return result;
2113 }
2114
2115 /* Memory buffers.  Changing these three constants can have a dramatic
2116    effect on performance.  The values here are reasonable defaults,
2117    but might be tuned.  If you adjust them, be sure to test across a
2118    range of uses of cpplib, including heavy nested function-like macro
2119    expansion.  Also check the change in peak memory usage (NJAMD is a
2120    good tool for this).  */
2121 #define MIN_BUFF_SIZE 8000
2122 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2123 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2124         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2125
2126 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2127   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2128 #endif
2129
2130 /* Create a new allocation buffer.  Place the control block at the end
2131    of the buffer, so that buffer overflows will cause immediate chaos.  */
2132 static _cpp_buff *
2133 new_buff (size_t len)
2134 {
2135   _cpp_buff *result;
2136   unsigned char *base;
2137
2138   if (len < MIN_BUFF_SIZE)
2139     len = MIN_BUFF_SIZE;
2140   len = CPP_ALIGN (len);
2141
2142   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2143   result = (_cpp_buff *) (base + len);
2144   result->base = base;
2145   result->cur = base;
2146   result->limit = base + len;
2147   result->next = NULL;
2148   return result;
2149 }
2150
2151 /* Place a chain of unwanted allocation buffers on the free list.  */
2152 void
2153 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2154 {
2155   _cpp_buff *end = buff;
2156
2157   while (end->next)
2158     end = end->next;
2159   end->next = pfile->free_buffs;
2160   pfile->free_buffs = buff;
2161 }
2162
2163 /* Return a free buffer of size at least MIN_SIZE.  */
2164 _cpp_buff *
2165 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
2166 {
2167   _cpp_buff *result, **p;
2168
2169   for (p = &pfile->free_buffs;; p = &(*p)->next)
2170     {
2171       size_t size;
2172
2173       if (*p == NULL)
2174         return new_buff (min_size);
2175       result = *p;
2176       size = result->limit - result->base;
2177       /* Return a buffer that's big enough, but don't waste one that's
2178          way too big.  */
2179       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2180         break;
2181     }
2182
2183   *p = result->next;
2184   result->next = NULL;
2185   result->cur = result->base;
2186   return result;
2187 }
2188
2189 /* Creates a new buffer with enough space to hold the uncommitted
2190    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2191    the excess bytes to the new buffer.  Chains the new buffer after
2192    BUFF, and returns the new buffer.  */
2193 _cpp_buff *
2194 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2195 {
2196   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2197   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2198
2199   buff->next = new_buff;
2200   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2201   return new_buff;
2202 }
2203
2204 /* Creates a new buffer with enough space to hold the uncommitted
2205    remaining bytes of the buffer pointed to by BUFF, and at least
2206    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2207    Chains the new buffer before the buffer pointed to by BUFF, and
2208    updates the pointer to point to the new buffer.  */
2209 void
2210 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2211 {
2212   _cpp_buff *new_buff, *old_buff = *pbuff;
2213   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2214
2215   new_buff = _cpp_get_buff (pfile, size);
2216   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2217   new_buff->next = old_buff;
2218   *pbuff = new_buff;
2219 }
2220
2221 /* Free a chain of buffers starting at BUFF.  */
2222 void
2223 _cpp_free_buff (_cpp_buff *buff)
2224 {
2225   _cpp_buff *next;
2226
2227   for (; buff; buff = next)
2228     {
2229       next = buff->next;
2230       free (buff->base);
2231     }
2232 }
2233
2234 /* Allocate permanent, unaligned storage of length LEN.  */
2235 unsigned char *
2236 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2237 {
2238   _cpp_buff *buff = pfile->u_buff;
2239   unsigned char *result = buff->cur;
2240
2241   if (len > (size_t) (buff->limit - result))
2242     {
2243       buff = _cpp_get_buff (pfile, len);
2244       buff->next = pfile->u_buff;
2245       pfile->u_buff = buff;
2246       result = buff->cur;
2247     }
2248
2249   buff->cur = result + len;
2250   return result;
2251 }
2252
2253 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2254    That buffer is used for growing allocations when saving macro
2255    replacement lists in a #define, and when parsing an answer to an
2256    assertion in #assert, #unassert or #if (and therefore possibly
2257    whilst expanding macros).  It therefore must not be used by any
2258    code that they might call: specifically the lexer and the guts of
2259    the macro expander.
2260
2261    All existing other uses clearly fit this restriction: storing
2262    registered pragmas during initialization.  */
2263 unsigned char *
2264 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2265 {
2266   _cpp_buff *buff = pfile->a_buff;
2267   unsigned char *result = buff->cur;
2268
2269   if (len > (size_t) (buff->limit - result))
2270     {
2271       buff = _cpp_get_buff (pfile, len);
2272       buff->next = pfile->a_buff;
2273       pfile->a_buff = buff;
2274       result = buff->cur;
2275     }
2276
2277   buff->cur = result + len;
2278   return result;
2279 }
2280
2281 /* Say which field of TOK is in use.  */
2282
2283 enum cpp_token_fld_kind
2284 cpp_token_val_index (cpp_token *tok)
2285 {
2286   switch (TOKEN_SPELL (tok))
2287     {
2288     case SPELL_IDENT:
2289       return CPP_TOKEN_FLD_NODE;
2290     case SPELL_LITERAL:
2291       return CPP_TOKEN_FLD_STR;
2292     case SPELL_NONE:
2293       if (tok->type == CPP_MACRO_ARG)
2294         return CPP_TOKEN_FLD_ARG_NO;
2295       else if (tok->type == CPP_PADDING)
2296         return CPP_TOKEN_FLD_SOURCE;
2297       else if (tok->type == CPP_PRAGMA)
2298         return CPP_TOKEN_FLD_PRAGMA;
2299       /* else fall through */
2300     default:
2301       return CPP_TOKEN_FLD_NONE;
2302     }
2303 }