git.gag.com Git - fw/sdcc/blob - support/cpp2/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27 #include <assert.h>
  28
  29 /* MULTIBYTE_CHARS support only works for native compilers.
  30    ??? Ideally what we want is to model widechar support after
  31    the current floating point support.  */
  32 #ifdef CROSS_COMPILE
  33 #undef MULTIBYTE_CHARS
  34 #endif
  35
  36 #ifdef MULTIBYTE_CHARS
  37 #include "mbchar.h"
  38 #include <locale.h>
  39 #endif
  40
  41 /* Tokens with SPELL_STRING store their spelling in the token list,
  42    and it's length in the token->val.name.len.  */
  43 enum spell_type
  44 {
  45   SPELL_OPERATOR = 0,
  46   SPELL_CHAR,
  47   SPELL_IDENT,
  48   SPELL_NUMBER,
  49   SPELL_STRING,
  50   SPELL_NONE
  51 };
  52
  53 struct token_spelling
  54 {
  55   enum spell_type category;
  56   const unsigned char *name;
  57 };
  58
  59 static const unsigned char *const digraph_spellings[] =
  60 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  61
  62 #define OP(e, s) { SPELL_OPERATOR, U s           },
  63 #define TK(e, s) { s,              U STRINGX (e) },
  64 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  65 #undef OP
  66 #undef TK
  67
  68 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  69 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  70 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  71
  72 static void handle_newline PARAMS ((cpp_reader *));
  73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  74 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  75
  76 static int skip_asm_block PARAMS ((cpp_reader *));
  77 static int skip_block_comment PARAMS ((cpp_reader *));
  78 static int skip_line_comment PARAMS ((cpp_reader *));
  79 static void adjust_column PARAMS ((cpp_reader *));
  80 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  81 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  82 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  83                                                     const U_CHAR *));
  84 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  85 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  86 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  87 static void unterminated PARAMS ((cpp_reader *, int));
  88 static bool trigraph_p PARAMS ((cpp_reader *));
  89 static unsigned int copy_text_chars PARAMS ((char *, const char *, unsigned int));
  90 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  91 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  92 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  93 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  94                                    const unsigned char *, unsigned int *));
  95 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  96
  97 static unsigned int hex_digit_value PARAMS ((unsigned int));
  98 static _cpp_buff *new_buff PARAMS ((size_t));
  99
 100 /* Utility routine:
 101
 102    Compares, the token TOKEN to the NUL-terminated string STRING.
 103    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 104 int
 105 cpp_ideq (token, string)
 106      const cpp_token *token;
 107      const char *string;
 108 {
 109   if (token->type != CPP_NAME)
 110     return 0;
 111
 112   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 113 }
 114
 115 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 116    Returns with buffer->cur pointing to the character immediately
 117    following the newline (combination).  */
 118 static void
 119 handle_newline (pfile)
 120      cpp_reader *pfile;
 121 {
 122   cpp_buffer *buffer = pfile->buffer;
 123
 124   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 125      only accept CR-LF; maybe we should fall back to that behaviour?  */
 126   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 127     buffer->cur++;
 128
 129   buffer->line_base = buffer->cur;
 130   buffer->col_adjust = 0;
 131   pfile->line++;
 132 }
 133
 134 /* Subroutine of skip_escaped_newlines; called when a 3-character
 135    sequence beginning with "??" is encountered.  buffer->cur points to
 136    the second '?'.
 137
 138    Warn if necessary, and returns true if the sequence forms a
 139    trigraph and the trigraph should be honoured.  */
 140 static bool
 141 trigraph_p (pfile)
 142      cpp_reader *pfile;
 143 {
 144   cpp_buffer *buffer = pfile->buffer;
 145   cppchar_t from_char = buffer->cur[1];
 146   bool accept;
 147
 148   if (!_cpp_trigraph_map[from_char])
 149     return false;
 150
 151   accept = CPP_OPTION (pfile, trigraphs);
 152
 153   /* Don't warn about trigraphs in comments.  */
 154   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 155     {
 156       if (accept)
 157         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
 158                                "trigraph ??%c converted to %c",
 159                                (int) from_char,
 160                                (int) _cpp_trigraph_map[from_char]);
 161       else if (buffer->cur != buffer->last_Wtrigraphs)
 162         {
 163           buffer->last_Wtrigraphs = buffer->cur;
 164           cpp_warning_with_line (pfile, pfile->line,
 165                                  CPP_BUF_COL (buffer) - 1,
 166                                  "trigraph ??%c ignored", (int) from_char);
 167         }
 168     }
 169
 170   return accept;
 171 }
 172
 173 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 174    lie in buffer->cur[-1].  Returns the next byte, which will be in
 175    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 176    2 of the ISO C standard.  */
 177 static cppchar_t
 178 skip_escaped_newlines (pfile)
 179      cpp_reader *pfile;
 180 {
 181   cpp_buffer *buffer = pfile->buffer;
 182   cppchar_t next = buffer->cur[-1];
 183
 184   /* Only do this if we apply stages 1 and 2.  */
 185   if (!buffer->from_stage3)
 186     {
 187       const unsigned char *saved_cur;
 188       cppchar_t next1;
 189
 190       do
 191         {
 192           if (next == '?')
 193             {
 194               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 195                 break;
 196
 197               /* Translate the trigraph.  */
 198               next = _cpp_trigraph_map[buffer->cur[1]];
 199               buffer->cur += 2;
 200               if (next != '\\')
 201                 break;
 202             }
 203
 204           if (buffer->cur == buffer->rlimit)
 205             break;
 206
 207           /* We have a backslash, and room for at least one more
 208              character.  Skip horizontal whitespace.  */
 209           saved_cur = buffer->cur;
 210           do
 211             next1 = *buffer->cur++;
 212           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 213
 214           if (!is_vspace (next1))
 215             {
 216               buffer->cur = saved_cur;
 217               break;
 218             }
 219
 220           if (saved_cur != buffer->cur - 1
 221               && !pfile->state.lexing_comment)
 222             cpp_warning (pfile, "backslash and newline separated by space");
 223
 224           handle_newline (pfile);
 225           buffer->backup_to = buffer->cur;
 226           if (buffer->cur == buffer->rlimit)
 227             {
 228               cpp_pedwarn (pfile, "backslash-newline at end of file");
 229               next = EOF;
 230             }
 231           else
 232             next = *buffer->cur++;
 233         }
 234       while (next == '\\' || next == '?');
 235     }
 236
 237   return next;
 238 }
 239
 240 /* Obtain the next character, after trigraph conversion and skipping
 241    an arbitrarily long string of escaped newlines.  The common case of
 242    no trigraphs or escaped newlines falls through quickly.  On return,
 243    buffer->backup_to points to where to return to if the character is
 244    not to be processed.  */
 245 static cppchar_t
 246 get_effective_char (pfile)
 247      cpp_reader *pfile;
 248 {
 249   cppchar_t next;
 250   cpp_buffer *buffer = pfile->buffer;
 251
 252   buffer->backup_to = buffer->cur;
 253   next = *buffer->cur++;
 254   if (__builtin_expect (next == '?' || next == '\\', 0))
 255     next = skip_escaped_newlines (pfile);
 256
 257    return next;
 258 }
 259
 260 /* SDCC _asm specific */
 261 /* Skip an _asm ... _endasm block.  We find the end of the comment by
 262    seeing _endasm.  Returns non-zero if _asm terminated by EOF, zero
 263    otherwise.  */
 264 static int
 265 skip_asm_block (pfile)
 266      cpp_reader *pfile;
 267 {
 268 #define _ENDASM_STR "endasm"
 269 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
 270
 271   cpp_buffer *buffer = pfile->buffer;
 272   cppchar_t c = EOF;
 273   int prev_space = 0;
 274   int ret = 1;
 275
 276   pfile->state.lexing_comment = 1;
 277   while (buffer->cur != buffer->rlimit)
 278     {
 279       prev_space = is_space(c);
 280       c = *buffer->cur++;
 281
 282     next_char:
 283       /* FIXME: For speed, create a new character class of characters
 284          of interest inside block comments.  */
 285       if (c == '?' || c == '\\')
 286         c = skip_escaped_newlines (pfile);
 287
 288       if (prev_space && c == '_')
 289         {
 290           if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
 291             strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
 292             {
 293               buffer->cur += _ENDASM_LEN;
 294               ret = 0;
 295               break;
 296             }
 297         }
 298       else if (is_vspace (c))
 299         {
 300           prev_space = is_space(c);
 301           handle_newline (pfile);
 302           goto next_char;
 303         }
 304       else if (c == '\t')
 305         adjust_column (pfile);
 306     }
 307
 308   pfile->state.lexing_comment = 0;
 309   return ret;
 310 }
 311
 312 /* Skip a C-style block comment.  We find the end of the comment by
 313    seeing if an asterisk is before every '/' we encounter.  Returns
 314    non-zero if comment terminated by EOF, zero otherwise.  */
 315 static int
 316 skip_block_comment (pfile)
 317      cpp_reader *pfile;
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   cppchar_t c = EOF, prevc = EOF;
 321
 322   pfile->state.lexing_comment = 1;
 323   while (buffer->cur != buffer->rlimit)
 324     {
 325       prevc = c, c = *buffer->cur++;
 326
 327       /* FIXME: For speed, create a new character class of characters
 328          of interest inside block comments.  */
 329       if (c == '?' || c == '\\')
 330         c = skip_escaped_newlines (pfile);
 331
 332       /* People like decorating comments with '*', so check for '/'
 333          instead for efficiency.  */
 334       if (c == '/')
 335         {
 336           if (prevc == '*')
 337             break;
 338
 339           /* Warn about potential nested comments, but not if the '/'
 340              comes immediately before the true comment delimiter.
 341              Don't bother to get it right across escaped newlines.  */
 342           if (CPP_OPTION (pfile, warn_comments)
 343               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 344             cpp_warning_with_line (pfile,
 345                                    pfile->line, CPP_BUF_COL (buffer),
 346                                    "\"/*\" within comment");
 347         }
 348       else if (is_vspace (c))
 349         handle_newline (pfile);
 350       else if (c == '\t')
 351         adjust_column (pfile);
 352     }
 353
 354   pfile->state.lexing_comment = 0;
 355   return c != '/' || prevc != '*';
 356 }
 357
 358 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 359    terminating newline.  Handles escaped newlines.  Returns non-zero
 360    if a multiline comment.  */
 361 static int
 362 skip_line_comment (pfile)
 363      cpp_reader *pfile;
 364 {
 365   cpp_buffer *buffer = pfile->buffer;
 366   unsigned int orig_line = pfile->line;
 367   cppchar_t c;
 368
 369   pfile->state.lexing_comment = 1;
 370   do
 371     {
 372       if (buffer->cur == buffer->rlimit)
 373         goto at_eof;
 374
 375       c = *buffer->cur++;
 376       if (c == '?' || c == '\\')
 377         c = skip_escaped_newlines (pfile);
 378     }
 379   while (!is_vspace (c));
 380
 381   /* Step back over the newline, except at EOF.  */
 382   buffer->cur--;
 383  at_eof:
 384
 385   pfile->state.lexing_comment = 0;
 386   return orig_line != pfile->line;
 387 }
 388
 389 /* pfile->buffer->cur is one beyond the \t character.  Update
 390    col_adjust so we track the column correctly.  */
 391 static void
 392 adjust_column (pfile)
 393      cpp_reader *pfile;
 394 {
 395   cpp_buffer *buffer = pfile->buffer;
 396   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 397
 398   /* Round it up to multiple of the tabstop, but subtract 1 since the
 399      tab itself occupies a character position.  */
 400   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 401                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 402 }
 403
 404 /* Skips whitespace, saving the next non-whitespace character.
 405    Adjusts pfile->col_adjust to account for tabs.  Without this,
 406    tokens might be assigned an incorrect column.  */
 407 static int
 408 skip_whitespace (pfile, c)
 409      cpp_reader *pfile;
 410      cppchar_t c;
 411 {
 412   cpp_buffer *buffer = pfile->buffer;
 413   unsigned int warned = 0;
 414
 415   do
 416     {
 417       /* Horizontal space always OK.  */
 418       if (c == ' ')
 419         ;
 420       else if (c == '\t')
 421         adjust_column (pfile);
 422       /* Just \f \v or \0 left.  */
 423       else if (c == '\0')
 424         {
 425           if (buffer->cur - 1 == buffer->rlimit)
 426             return 0;
 427           if (!warned)
 428             {
 429               cpp_warning (pfile, "null character(s) ignored");
 430               warned = 1;
 431             }
 432         }
 433       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 434         cpp_pedwarn_with_line (pfile, pfile->line,
 435                                CPP_BUF_COL (buffer),
 436                                "%s in preprocessing directive",
 437                                c == '\f' ? "form feed" : "vertical tab");
 438
 439       c = *buffer->cur++;
 440     }
 441   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 442   while (is_nvspace (c));
 443
 444   buffer->cur--;
 445   return 1;
 446 }
 447
 448 /* See if the characters of a number token are valid in a name (no
 449    '.', '+' or '-').  */
 450 static int
 451 name_p (pfile, string)
 452      cpp_reader *pfile;
 453      const cpp_string *string;
 454 {
 455   unsigned int i;
 456
 457   for (i = 0; i < string->len; i++)
 458     if (!is_idchar (string->text[i]))
 459       return 0;
 460
 461   return 1;
 462 }
 463
 464 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 465    a critical inner loop.  The common case is an identifier which has
 466    not been split by backslash-newline, does not contain a dollar
 467    sign, and has already been scanned (roughly 10:1 ratio of
 468    seen:unseen identifiers in normal code; the distribution is
 469    Poisson-like).  Second most common case is a new identifier, not
 470    split and no dollar sign.  The other possibilities are rare and
 471    have been relegated to parse_identifier_slow.  */
 472 static cpp_hashnode *
 473 parse_identifier (pfile)
 474      cpp_reader *pfile;
 475 {
 476   cpp_hashnode *result;
 477   const U_CHAR *cur;
 478
 479   /* Fast-path loop.  Skim over a normal identifier.
 480      N.B. ISIDNUM does not include $.  */
 481   cur = pfile->buffer->cur;
 482   while (ISIDNUM (*cur))
 483     cur++;
 484
 485   /* Check for slow-path cases.  */
 486   if (*cur == '?' || *cur == '\\' || *cur == '$')
 487     result = parse_identifier_slow (pfile, cur);
 488   else
 489     {
 490       const U_CHAR *base = pfile->buffer->cur - 1;
 491       result = (cpp_hashnode *)
 492         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 493       pfile->buffer->cur = cur;
 494     }
 495
 496   /* Rarely, identifiers require diagnostics when lexed.
 497      XXX Has to be forced out of the fast path.  */
 498   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 499                         && !pfile->state.skipping, 0))
 500     {
 501       /* It is allowed to poison the same identifier twice.  */
 502       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 503         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 504                    NODE_NAME (result));
 505
 506       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 507          replacement list of a variadic macro.  */
 508       if (result == pfile->spec_nodes.n__VA_ARGS__
 509           && !pfile->state.va_args_ok)
 510         cpp_pedwarn (pfile,
 511         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 512     }
 513
 514   return result;
 515 }
 516
 517 /* Slow path.  This handles identifiers which have been split, and
 518    identifiers which contain dollar signs.  The part of the identifier
 519    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 520 static cpp_hashnode *
 521 parse_identifier_slow (pfile, cur)
 522      cpp_reader *pfile;
 523      const U_CHAR *cur;
 524 {
 525   cpp_buffer *buffer = pfile->buffer;
 526   const U_CHAR *base = buffer->cur - 1;
 527   struct obstack *stack = &pfile->hash_table->stack;
 528   unsigned int c, saw_dollar = 0, len;
 529
 530   /* Copy the part of the token which is known to be okay.  */
 531   obstack_grow (stack, base, cur - base);
 532
 533   /* Now process the part which isn't.  We are looking at one of
 534      '$', '\\', or '?' on entry to this loop.  */
 535   c = *cur++;
 536   buffer->cur = cur;
 537   do
 538     {
 539       while (is_idchar (c))
 540         {
 541           obstack_1grow (stack, c);
 542
 543           if (c == '$')
 544             saw_dollar++;
 545
 546           c = *buffer->cur++;
 547         }
 548
 549       /* Potential escaped newline?  */
 550       buffer->backup_to = buffer->cur - 1;
 551       if (c != '?' && c != '\\')
 552         break;
 553       c = skip_escaped_newlines (pfile);
 554     }
 555   while (is_idchar (c));
 556
 557   /* Step back over the unwanted char.  */
 558   BACKUP ();
 559
 560   /* $ is not an identifier character in the standard, but is commonly
 561      accepted as an extension.  Don't warn about it in skipped
 562      conditional blocks.  */
 563   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 564     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 565
 566   /* Identifiers are null-terminated.  */
 567   len = obstack_object_size (stack);
 568   obstack_1grow (stack, '\0');
 569
 570   return (cpp_hashnode *)
 571     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 572 }
 573
 574 /* Parse a number, beginning with character C, skipping embedded
 575    backslash-newlines.  LEADING_PERIOD is non-zero if there was a "."
 576    before C.  Place the result in NUMBER.  */
 577 static void
 578 parse_number (pfile, number, c, leading_period)
 579      cpp_reader *pfile;
 580      cpp_string *number;
 581      cppchar_t c;
 582      int leading_period;
 583 {
 584   cpp_buffer *buffer = pfile->buffer;
 585   unsigned char *dest, *limit;
 586
 587   dest = BUFF_FRONT (pfile->u_buff);
 588   limit = BUFF_LIMIT (pfile->u_buff);
 589
 590   /* Place a leading period.  */
 591   if (leading_period)
 592     {
 593       if (dest == limit)
 594         {
 595           _cpp_extend_buff (pfile, &pfile->u_buff, 1);
 596           dest = BUFF_FRONT (pfile->u_buff);
 597           limit = BUFF_LIMIT (pfile->u_buff);
 598         }
 599       *dest++ = '.';
 600     }
 601
 602   do
 603     {
 604       do
 605         {
 606           /* Need room for terminating null.  */
 607           if ((size_t) (limit - dest) < 2)
 608             {
 609               size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 610               _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 611               dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 612               limit = BUFF_LIMIT (pfile->u_buff);
 613             }
 614           *dest++ = c;
 615
 616           c = *buffer->cur++;
 617         }
 618       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 619
 620       /* Potential escaped newline?  */
 621       buffer->backup_to = buffer->cur - 1;
 622       if (c != '?' && c != '\\')
 623         break;
 624       c = skip_escaped_newlines (pfile);
 625     }
 626   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 627
 628   /* Step back over the unwanted char.  */
 629   BACKUP ();
 630
 631   /* Null-terminate the number.  */
 632   *dest = '\0';
 633
 634   number->text = BUFF_FRONT (pfile->u_buff);
 635   number->len = dest - number->text;
 636   BUFF_FRONT (pfile->u_buff) = dest + 1;
 637 }
 638
 639 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 640 static void
 641 unterminated (pfile, term)
 642      cpp_reader *pfile;
 643      int term;
 644 {
 645   cpp_error (pfile, "missing terminating %c character", term);
 646
 647   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 648     {
 649       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 650                            "possible start of unterminated string literal");
 651       pfile->mls_line = 0;
 652     }
 653 }
 654
 655 /* Subroutine of parse_string.  */
 656 static int
 657 unescaped_terminator_p (pfile, dest)
 658      cpp_reader *pfile;
 659      const unsigned char *dest;
 660 {
 661   const unsigned char *start, *temp;
 662
 663   /* In #include-style directives, terminators are not escapeable.  */
 664   if (pfile->state.angled_headers)
 665     return 1;
 666
 667   start = BUFF_FRONT (pfile->u_buff);
 668
 669   /* An odd number of consecutive backslashes represents an escaped
 670      terminator.  */
 671   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 672     ;
 673
 674   return ((dest - temp) & 1) == 0;
 675 }
 676
 677 /* Parses a string, character constant, or angle-bracketed header file
 678    name.  Handles embedded trigraphs and escaped newlines.  The stored
 679    string is guaranteed NUL-terminated, but it is not guaranteed that
 680    this is the first NUL since embedded NULs are preserved.
 681    Multi-line strings are allowed, but they are deprecated.
 682
 683    When this function returns, buffer->cur points to the next
 684    character to be processed.  */
 685 static void
 686 parse_string (pfile, token, terminator)
 687      cpp_reader *pfile;
 688      cpp_token *token;
 689      cppchar_t terminator;
 690 {
 691   cpp_buffer *buffer = pfile->buffer;
 692   unsigned char *dest, *limit;
 693   cppchar_t c;
 694   bool warned_nulls = false, warned_multi = false;
 695
 696   dest = BUFF_FRONT (pfile->u_buff);
 697   limit = BUFF_LIMIT (pfile->u_buff);
 698
 699   for (;;)
 700     {
 701       /* We need room for another char, possibly the terminating NUL.  */
 702       if ((size_t) (limit - dest) < 1)
 703         {
 704           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 705           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 706           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 707           limit = BUFF_LIMIT (pfile->u_buff);
 708         }
 709
 710       /* Handle trigraphs, escaped newlines etc.  */
 711       c = *buffer->cur++;
 712       if (c == '?' || c == '\\')
 713         c = skip_escaped_newlines (pfile);
 714
 715       if (c == terminator)
 716         {
 717           if (unescaped_terminator_p (pfile, dest))
 718             break;
 719         }
 720       else if (is_vspace (c))
 721         {
 722           /* In assembly language, silently terminate string and
 723              character literals at end of line.  This is a kludge
 724              around not knowing where comments are.  */
 725           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 726             {
 727               buffer->cur--;
 728               break;
 729             }
 730
 731           /* Character constants and header names may not extend over
 732              multiple lines.  In Standard C, neither may strings.
 733              Unfortunately, we accept multiline strings as an
 734              extension, except in #include family directives.  */
 735           if (terminator != '"' || pfile->state.angled_headers)
 736             {
 737               unterminated (pfile, terminator);
 738               buffer->cur--;
 739               break;
 740             }
 741
 742           if (!warned_multi)
 743             {
 744               warned_multi = true;
 745               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 746             }
 747
 748           if (pfile->mls_line == 0)
 749             {
 750               pfile->mls_line = token->line;
 751               pfile->mls_col = token->col;
 752             }
 753
 754           handle_newline (pfile);
 755           c = '\n';
 756         }
 757       else if (c == '\0')
 758         {
 759           if (buffer->cur - 1 == buffer->rlimit)
 760             {
 761               unterminated (pfile, terminator);
 762               buffer->cur--;
 763               break;
 764             }
 765           if (!warned_nulls)
 766             {
 767               warned_nulls = true;
 768               cpp_warning (pfile, "null character(s) preserved in literal");
 769             }
 770         }
 771
 772       *dest++ = c;
 773     }
 774
 775   *dest = '\0';
 776
 777   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 778   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 779   BUFF_FRONT (pfile->u_buff) = dest + 1;
 780 }
 781
 782 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
 783    comment blocks (when executed with -C option) and
 784    _asm (SDCPP specific) blocks */
 785
 786 /* Count and copy characters from src to dest, excluding CRs:
 787    CRs are automatically generated, because the output is
 788    opened in TEXT mode. If dest == NULL, only count chars */
 789 static unsigned int
 790 copy_text_chars (dest, src, len)
 791      char *dest;
 792      const char *src;
 793      unsigned int len;
 794 {
 795   unsigned int n = 0;
 796   const char *p;
 797
 798   for (p = src; p != src + len; ++p)
 799     {
 800       assert(*p != '\0');
 801
 802       if (*p != '\r')
 803         {
 804           if (dest != NULL)
 805             *dest++ = *p;
 806           ++n;
 807         }
 808     }
 809
 810     return n;
 811 }
 812
 813 /* SDCC _asm specific */
 814 /* The stored comment includes the comment start and any terminator.  */
 815 static void
 816 save_asm (pfile, token, from)
 817      cpp_reader *pfile;
 818      cpp_token *token;
 819      const unsigned char *from;
 820 {
 821 #define _ASM_STR  "_asm"
 822 #define _ASM_LEN  ((sizeof _ASM_STR) - 1)
 823
 824   unsigned char *buffer;
 825   unsigned int text_len, len;
 826
 827   len = pfile->buffer->cur - from;
 828   /* + _ASM_LEN for the initial '_asm'.  */
 829   text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
 830   buffer = _cpp_unaligned_alloc (pfile, text_len);
 831
 832
 833   token->type = CPP_ASM;
 834   token->val.str.len = text_len;
 835   token->val.str.text = buffer;
 836
 837   memcpy (buffer, _ASM_STR, _ASM_LEN);
 838   copy_text_chars (buffer + _ASM_LEN, from, len);
 839 }
 840
 841 /* The stored comment includes the comment start and any terminator.  */
 842 static void
 843 save_comment (pfile, token, from)
 844      cpp_reader *pfile;
 845      cpp_token *token;
 846      const unsigned char *from;
 847 {
 848   unsigned char *buffer;
 849   unsigned int len;
 850
 851   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 852
 853   /* C++ comments probably (not definitely) have moved past a new
 854      line, which we don't want to save in the comment.  */
 855   if (is_vspace (pfile->buffer->cur[-1]))
 856     len--;
 857   buffer = _cpp_unaligned_alloc (pfile, len);
 858
 859   token->type = CPP_COMMENT;
 860   token->val.str.len = len;
 861   token->val.str.text = buffer;
 862
 863   buffer[0] = '/';
 864   copy_text_chars (buffer + 1, from, len);
 865 }
 866
 867 /* Allocate COUNT tokens for RUN.  */
 868 void
 869 _cpp_init_tokenrun (run, count)
 870      tokenrun *run;
 871      unsigned int count;
 872 {
 873   run->base = xnewvec (cpp_token, count);
 874   run->limit = run->base + count;
 875   run->next = NULL;
 876 }
 877
 878 /* Returns the next tokenrun, or creates one if there is none.  */
 879 static tokenrun *
 880 next_tokenrun (run)
 881      tokenrun *run;
 882 {
 883   if (run->next == NULL)
 884     {
 885       run->next = xnew (tokenrun);
 886       run->next->prev = run;
 887       _cpp_init_tokenrun (run->next, 250);
 888     }
 889
 890   return run->next;
 891 }
 892
 893 /* Allocate a single token that is invalidated at the same time as the
 894    rest of the tokens on the line.  Has its line and col set to the
 895    same as the last lexed token, so that diagnostics appear in the
 896    right place.  */
 897 cpp_token *
 898 _cpp_temp_token (pfile)
 899      cpp_reader *pfile;
 900 {
 901   cpp_token *old, *result;
 902
 903   old = pfile->cur_token - 1;
 904   if (pfile->cur_token == pfile->cur_run->limit)
 905     {
 906       pfile->cur_run = next_tokenrun (pfile->cur_run);
 907       pfile->cur_token = pfile->cur_run->base;
 908     }
 909
 910   result = pfile->cur_token++;
 911   result->line = old->line;
 912   result->col = old->col;
 913   return result;
 914 }
 915
 916 /* Lex a token into RESULT (external interface).  Takes care of issues
 917    like directive handling, token lookahead, multiple include
 918    optimization and skipping.  */
 919 const cpp_token *
 920 _cpp_lex_token (pfile)
 921      cpp_reader *pfile;
 922 {
 923   cpp_token *result;
 924
 925   for (;;)
 926     {
 927       if (pfile->cur_token == pfile->cur_run->limit)
 928         {
 929           pfile->cur_run = next_tokenrun (pfile->cur_run);
 930           pfile->cur_token = pfile->cur_run->base;
 931         }
 932
 933       if (pfile->lookaheads)
 934         {
 935           pfile->lookaheads--;
 936           result = pfile->cur_token++;
 937         }
 938       else
 939         result = _cpp_lex_direct (pfile);
 940
 941       if (result->flags & BOL)
 942         {
 943           /* Is this a directive.  If _cpp_handle_directive returns
 944              false, it is an assembler #.  */
 945           if (result->type == CPP_HASH
 946               && !pfile->state.parsing_args
 947               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 948             continue;
 949           if (pfile->cb.line_change && !pfile->state.skipping)
 950             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 951         }
 952
 953       /* We don't skip tokens in directives.  */
 954       if (pfile->state.in_directive)
 955         break;
 956
 957       /* Outside a directive, invalidate controlling macros.  At file
 958          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 959          get here and MI optimisation works.  */
 960       pfile->mi_valid = false;
 961
 962       if (!pfile->state.skipping || result->type == CPP_EOF)
 963         break;
 964     }
 965
 966   return result;
 967 }
 968
 969 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 970   do {                                          \
 971     if (get_effective_char (pfile) == CHAR)     \
 972       result->type = THEN_TYPE;                 \
 973     else                                        \
 974       {                                         \
 975         BACKUP ();                              \
 976         result->type = ELSE_TYPE;               \
 977       }                                         \
 978   } while (0)
 979
 980 /* Lex a token into pfile->cur_token, which is also incremented, to
 981    get diagnostics pointing to the correct location.
 982
 983    Does not handle issues such as token lookahead, multiple-include
 984    optimisation, directives, skipping etc.  This function is only
 985    suitable for use by _cpp_lex_token, and in special cases like
 986    lex_expansion_token which doesn't care for any of these issues.
 987
 988    When meeting a newline, returns CPP_EOF if parsing a directive,
 989    otherwise returns to the start of the token buffer if permissible.
 990    Returns the location of the lexed token.  */
 991 cpp_token *
 992 _cpp_lex_direct (pfile)
 993      cpp_reader *pfile;
 994 {
 995   cppchar_t c;
 996   cpp_buffer *buffer;
 997   const unsigned char *comment_start;
 998   cpp_token *result = pfile->cur_token++;
 999
1000  fresh_line:
1001   buffer = pfile->buffer;
1002   result->flags = buffer->saved_flags;
1003   buffer->saved_flags = 0;
1004  update_tokens_line:
1005   result->line = pfile->line;
1006
1007  skipped_white:
1008   c = *buffer->cur++;
1009   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1010
1011  trigraph:
1012   switch (c)
1013     {
1014     case ' ': case '\t': case '\f': case '\v': case '\0':
1015       result->flags |= PREV_WHITE;
1016       if (skip_whitespace (pfile, c))
1017         goto skipped_white;
1018
1019       /* EOF.  */
1020       buffer->cur--;
1021       buffer->saved_flags = BOL;
1022       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1023         {
1024           if (buffer->cur != buffer->line_base)
1025             {
1026               /* Non-empty files should end in a newline.  Don't warn
1027                  for command line and _Pragma buffers.  */
1028               if (!buffer->from_stage3)
1029                 cpp_pedwarn (pfile, "no newline at end of file");
1030               handle_newline (pfile);
1031             }
1032
1033           /* Don't pop the last buffer.  */
1034           if (buffer->prev)
1035             {
1036               unsigned char stop = buffer->return_at_eof;
1037
1038               _cpp_pop_buffer (pfile);
1039               if (!stop)
1040                 goto fresh_line;
1041             }
1042         }
1043       result->type = CPP_EOF;
1044       break;
1045
1046     case '\n': case '\r':
1047       handle_newline (pfile);
1048       buffer->saved_flags = BOL;
1049       if (! pfile->state.in_directive)
1050         {
1051           if (pfile->state.parsing_args == 2)
1052             buffer->saved_flags |= PREV_WHITE;
1053           if (!pfile->keep_tokens)
1054             {
1055               pfile->cur_run = &pfile->base_run;
1056               result = pfile->base_run.base;
1057               pfile->cur_token = result + 1;
1058             }
1059           goto fresh_line;
1060         }
1061       result->type = CPP_EOF;
1062       break;
1063
1064     case '?':
1065     case '\\':
1066       /* These could start an escaped newline, or '?' a trigraph.  Let
1067          skip_escaped_newlines do all the work.  */
1068       {
1069         unsigned int line = pfile->line;
1070
1071         c = skip_escaped_newlines (pfile);
1072         if (line != pfile->line)
1073           {
1074             buffer->cur--;
1075             /* We had at least one escaped newline of some sort.
1076                Update the token's line and column.  */
1077             goto update_tokens_line;
1078           }
1079       }
1080
1081       /* We are either the original '?' or '\\', or a trigraph.  */
1082       if (c == '?')
1083         result->type = CPP_QUERY;
1084       else if (c == '\\')
1085         goto random_char;
1086       else
1087         goto trigraph;
1088       break;
1089
1090     case '0': case '1': case '2': case '3': case '4':
1091     case '5': case '6': case '7': case '8': case '9':
1092       result->type = CPP_NUMBER;
1093       parse_number (pfile, &result->val.str, c, 0);
1094       break;
1095
1096     case 'L':
1097       /* 'L' may introduce wide characters or strings.  */
1098         {
1099           const unsigned char *pos = buffer->cur;
1100
1101           c = get_effective_char (pfile);
1102           if (c == '\'' || c == '"')
1103             {
1104               result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1105               parse_string (pfile, result, c);
1106               break;
1107             }
1108           buffer->cur = pos;
1109         }
1110         /* Fall through.  */
1111
1112     start_ident:
1113     case '_':
1114     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1115     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1116     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1117     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1118     case 'y': case 'z':
1119     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1120     case 'G': case 'H': case 'I': case 'J': case 'K':
1121     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1122     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1123     case 'Y': case 'Z':
1124       result->type = CPP_NAME;
1125       result->val.node = parse_identifier (pfile);
1126
1127       /* SDCC _asm specific */
1128       /* handle _asm ... _endasm ;  */
1129       if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1130         {
1131           comment_start = buffer->cur;
1132           result->type = CPP_ASM;
1133           skip_asm_block (pfile);
1134           /* Save the _asm block as a token in its own right.  */
1135           save_asm (pfile, result, comment_start);
1136         }
1137       /* Convert named operators to their proper types.  */
1138       else if (result->val.node->flags & NODE_OPERATOR)
1139         {
1140           result->flags |= NAMED_OP;
1141           result->type = result->val.node->value.operator;
1142         }
1143       break;
1144
1145     case '\'':
1146     case '"':
1147       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1148       parse_string (pfile, result, c);
1149       break;
1150
1151     case '/':
1152       /* A potential block or line comment.  */
1153       comment_start = buffer->cur;
1154       c = get_effective_char (pfile);
1155
1156       if (c == '*')
1157         {
1158           if (skip_block_comment (pfile))
1159             cpp_error (pfile, "unterminated comment");
1160         }
1161       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1162                             || CPP_IN_SYSTEM_HEADER (pfile)))
1163         {
1164           /* Warn about comments only if pedantically GNUC89, and not
1165              in system headers.  */
1166           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1167               && ! buffer->warned_cplusplus_comments)
1168             {
1169               cpp_pedwarn (pfile,
1170                            "C++ style comments are not allowed in ISO C89");
1171               cpp_pedwarn (pfile,
1172                            "(this will be reported only once per input file)");
1173               buffer->warned_cplusplus_comments = 1;
1174             }
1175
1176           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1177             cpp_warning (pfile, "multi-line comment");
1178         }
1179       else if (c == '=')
1180         {
1181           result->type = CPP_DIV_EQ;
1182           break;
1183         }
1184       else
1185         {
1186           BACKUP ();
1187           result->type = CPP_DIV;
1188           break;
1189         }
1190
1191       if (!pfile->state.save_comments)
1192         {
1193           result->flags |= PREV_WHITE;
1194           goto update_tokens_line;
1195         }
1196
1197       /* Save the comment as a token in its own right.  */
1198       save_comment (pfile, result, comment_start);
1199       break;
1200
1201     case '<':
1202       if (pfile->state.angled_headers)
1203         {
1204           result->type = CPP_HEADER_NAME;
1205           parse_string (pfile, result, '>');
1206           break;
1207         }
1208
1209       c = get_effective_char (pfile);
1210       if (c == '=')
1211         result->type = CPP_LESS_EQ;
1212       else if (c == '<')
1213         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1214       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1215         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1216       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1217         {
1218           result->type = CPP_OPEN_SQUARE;
1219           result->flags |= DIGRAPH;
1220         }
1221       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1222         {
1223           result->type = CPP_OPEN_BRACE;
1224           result->flags |= DIGRAPH;
1225         }
1226       else
1227         {
1228           BACKUP ();
1229           result->type = CPP_LESS;
1230         }
1231       break;
1232
1233     case '>':
1234       c = get_effective_char (pfile);
1235       if (c == '=')
1236         result->type = CPP_GREATER_EQ;
1237       else if (c == '>')
1238         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1239       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1240         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1241       else
1242         {
1243           BACKUP ();
1244           result->type = CPP_GREATER;
1245         }
1246       break;
1247
1248     case '%':
1249       c = get_effective_char (pfile);
1250       if (c == '=')
1251         result->type = CPP_MOD_EQ;
1252       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1253         {
1254           result->flags |= DIGRAPH;
1255           result->type = CPP_HASH;
1256           if (get_effective_char (pfile) == '%')
1257             {
1258               const unsigned char *pos = buffer->cur;
1259
1260               if (get_effective_char (pfile) == ':')
1261                 result->type = CPP_PASTE;
1262               else
1263                 buffer->cur = pos - 1;
1264             }
1265           else
1266             BACKUP ();
1267         }
1268       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1269         {
1270           result->flags |= DIGRAPH;
1271           result->type = CPP_CLOSE_BRACE;
1272         }
1273       else
1274         {
1275           BACKUP ();
1276           result->type = CPP_MOD;
1277         }
1278       break;
1279
1280     case '.':
1281       result->type = CPP_DOT;
1282       c = get_effective_char (pfile);
1283       if (c == '.')
1284         {
1285           const unsigned char *pos = buffer->cur;
1286
1287           if (get_effective_char (pfile) == '.')
1288             result->type = CPP_ELLIPSIS;
1289           else
1290             buffer->cur = pos - 1;
1291         }
1292       /* All known character sets have 0...9 contiguous.  */
1293       else if (ISDIGIT (c))
1294         {
1295           result->type = CPP_NUMBER;
1296           parse_number (pfile, &result->val.str, c, 1);
1297         }
1298       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1299         result->type = CPP_DOT_STAR;
1300       else
1301         BACKUP ();
1302       break;
1303
1304     case '+':
1305       c = get_effective_char (pfile);
1306       if (c == '+')
1307         result->type = CPP_PLUS_PLUS;
1308       else if (c == '=')
1309         result->type = CPP_PLUS_EQ;
1310       else
1311         {
1312           BACKUP ();
1313           result->type = CPP_PLUS;
1314         }
1315       break;
1316
1317     case '-':
1318       c = get_effective_char (pfile);
1319       if (c == '>')
1320         {
1321           result->type = CPP_DEREF;
1322           if (CPP_OPTION (pfile, cplusplus))
1323             {
1324               if (get_effective_char (pfile) == '*')
1325                 result->type = CPP_DEREF_STAR;
1326               else
1327                 BACKUP ();
1328             }
1329         }
1330       else if (c == '-')
1331         result->type = CPP_MINUS_MINUS;
1332       else if (c == '=')
1333         result->type = CPP_MINUS_EQ;
1334       else
1335         {
1336           BACKUP ();
1337           result->type = CPP_MINUS;
1338         }
1339       break;
1340
1341     case '&':
1342       c = get_effective_char (pfile);
1343       if (c == '&')
1344         result->type = CPP_AND_AND;
1345       else if (c == '=')
1346         result->type = CPP_AND_EQ;
1347       else
1348         {
1349           BACKUP ();
1350           result->type = CPP_AND;
1351         }
1352       break;
1353
1354     case '|':
1355       c = get_effective_char (pfile);
1356       if (c == '|')
1357         result->type = CPP_OR_OR;
1358       else if (c == '=')
1359         result->type = CPP_OR_EQ;
1360       else
1361         {
1362           BACKUP ();
1363           result->type = CPP_OR;
1364         }
1365       break;
1366
1367     case ':':
1368       c = get_effective_char (pfile);
1369       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1370         result->type = CPP_SCOPE;
1371       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1372         {
1373           result->flags |= DIGRAPH;
1374           result->type = CPP_CLOSE_SQUARE;
1375         }
1376       else
1377         {
1378           BACKUP ();
1379           result->type = CPP_COLON;
1380         }
1381       break;
1382
1383     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1384     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1385     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1386     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1387     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1388
1389     case '~': result->type = CPP_COMPL; break;
1390     case ',': result->type = CPP_COMMA; break;
1391     case '(': result->type = CPP_OPEN_PAREN; break;
1392     case ')': result->type = CPP_CLOSE_PAREN; break;
1393     case '[': result->type = CPP_OPEN_SQUARE; break;
1394     case ']': result->type = CPP_CLOSE_SQUARE; break;
1395     case '{': result->type = CPP_OPEN_BRACE; break;
1396     case '}': result->type = CPP_CLOSE_BRACE; break;
1397     case ';': result->type = CPP_SEMICOLON; break;
1398
1399       /* @ is a punctuator in Objective C.  */
1400     case '@': result->type = CPP_ATSIGN; break;
1401
1402     case '$':
1403       if (CPP_OPTION (pfile, dollars_in_ident))
1404         goto start_ident;
1405       /* Fall through...  */
1406
1407     random_char:
1408     default:
1409       result->type = CPP_OTHER;
1410       result->val.c = c;
1411       break;
1412     }
1413
1414   return result;
1415 }
1416
1417 /* An upper bound on the number of bytes needed to spell TOKEN,
1418    including preceding whitespace.  */
1419 unsigned int
1420 cpp_token_len (token)
1421      const cpp_token *token;
1422 {
1423   unsigned int len;
1424
1425   switch (TOKEN_SPELL (token))
1426     {
1427     default:            len = 0;                                break;
1428     case SPELL_NUMBER:
1429     case SPELL_STRING:  len = token->val.str.len;               break;
1430     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1431     }
1432   /* 1 for whitespace, 4 for comment delimiters.  */
1433   return len + 5;
1434 }
1435
1436 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1437    already contain the enough space to hold the token's spelling.
1438    Returns a pointer to the character after the last character
1439    written.  */
1440 unsigned char *
1441 cpp_spell_token (pfile, token, buffer)
1442      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1443      const cpp_token *token;
1444      unsigned char *buffer;
1445 {
1446   switch (TOKEN_SPELL (token))
1447     {
1448     case SPELL_OPERATOR:
1449       {
1450         const unsigned char *spelling;
1451         unsigned char c;
1452
1453         if (token->flags & DIGRAPH)
1454           spelling
1455             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1456         else if (token->flags & NAMED_OP)
1457           goto spell_ident;
1458         else
1459           spelling = TOKEN_NAME (token);
1460
1461         while ((c = *spelling++) != '\0')
1462           *buffer++ = c;
1463       }
1464       break;
1465
1466     case SPELL_CHAR:
1467       *buffer++ = token->val.c;
1468       break;
1469
1470     spell_ident:
1471     case SPELL_IDENT:
1472       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1473       buffer += NODE_LEN (token->val.node);
1474       break;
1475
1476     case SPELL_NUMBER:
1477       memcpy (buffer, token->val.str.text, token->val.str.len);
1478       buffer += token->val.str.len;
1479       break;
1480
1481     case SPELL_STRING:
1482       {
1483         int left, right, tag;
1484         switch (token->type)
1485           {
1486           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1487           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1488           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1489           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1490           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1491           default:
1492             cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1493             return buffer;
1494           }
1495         if (tag) *buffer++ = tag;
1496         *buffer++ = left;
1497         memcpy (buffer, token->val.str.text, token->val.str.len);
1498         buffer += token->val.str.len;
1499         *buffer++ = right;
1500       }
1501       break;
1502
1503     case SPELL_NONE:
1504       cpp_ice (pfile, "unspellable token %s", TOKEN_NAME (token));
1505       break;
1506     }
1507
1508   return buffer;
1509 }
1510
1511 /* Returns TOKEN spelt as a null-terminated string.  The string is
1512    freed when the reader is destroyed.  Useful for diagnostics.  */
1513 unsigned char *
1514 cpp_token_as_text (pfile, token)
1515      cpp_reader *pfile;
1516      const cpp_token *token;
1517 {
1518   unsigned int len = cpp_token_len (token);
1519   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1520
1521   end = cpp_spell_token (pfile, token, start);
1522   end[0] = '\0';
1523
1524   return start;
1525 }
1526
1527 /* Used by C front ends, which really should move to using
1528    cpp_token_as_text.  */
1529 const char *
1530 cpp_type2name (type)
1531      enum cpp_ttype type;
1532 {
1533   return (const char *) token_spellings[type].name;
1534 }
1535
1536 /* Writes the spelling of token to FP, without any preceding space.
1537    Separated from cpp_spell_token for efficiency - to avoid stdio
1538    double-buffering.  */
1539 void
1540 cpp_output_token (token, fp)
1541      const cpp_token *token;
1542      FILE *fp;
1543 {
1544   switch (TOKEN_SPELL (token))
1545     {
1546     case SPELL_OPERATOR:
1547       {
1548         const unsigned char *spelling;
1549         int c;
1550
1551         if (token->flags & DIGRAPH)
1552           spelling
1553             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1554         else if (token->flags & NAMED_OP)
1555           goto spell_ident;
1556         else
1557           spelling = TOKEN_NAME (token);
1558
1559         c = *spelling;
1560         do
1561           putc (c, fp);
1562         while ((c = *++spelling) != '\0');
1563       }
1564       break;
1565
1566     case SPELL_CHAR:
1567       putc (token->val.c, fp);
1568       break;
1569
1570     spell_ident:
1571     case SPELL_IDENT:
1572       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1573     break;
1574
1575     case SPELL_NUMBER:
1576       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1577       break;
1578
1579     case SPELL_STRING:
1580       {
1581         int left, right, tag;
1582         switch (token->type)
1583           {
1584           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1585           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1586           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1587           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1588           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1589           default:
1590             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1591             return;
1592           }
1593         if (tag) putc (tag, fp);
1594         putc (left, fp);
1595         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1596         putc (right, fp);
1597       }
1598       break;
1599
1600     case SPELL_NONE:
1601       /* An error, most probably.  */
1602       break;
1603     }
1604 }
1605
1606 /* Compare two tokens.  */
1607 int
1608 _cpp_equiv_tokens (a, b)
1609      const cpp_token *a, *b;
1610 {
1611   if (a->type == b->type && a->flags == b->flags)
1612     switch (TOKEN_SPELL (a))
1613       {
1614       default:                  /* Keep compiler happy.  */
1615       case SPELL_OPERATOR:
1616         return 1;
1617       case SPELL_CHAR:
1618         return a->val.c == b->val.c; /* Character.  */
1619       case SPELL_NONE:
1620         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1621       case SPELL_IDENT:
1622         return a->val.node == b->val.node;
1623       case SPELL_NUMBER:
1624       case SPELL_STRING:
1625         return (a->val.str.len == b->val.str.len
1626                 && !memcmp (a->val.str.text, b->val.str.text,
1627                             a->val.str.len));
1628       }
1629
1630   return 0;
1631 }
1632
1633 /* Returns nonzero if a space should be inserted to avoid an
1634    accidental token paste for output.  For simplicity, it is
1635    conservative, and occasionally advises a space where one is not
1636    needed, e.g. "." and ".2".  */
1637 int
1638 cpp_avoid_paste (pfile, token1, token2)
1639      cpp_reader *pfile;
1640      const cpp_token *token1, *token2;
1641 {
1642   enum cpp_ttype a = token1->type, b = token2->type;
1643   cppchar_t c;
1644
1645   if (token1->flags & NAMED_OP)
1646     a = CPP_NAME;
1647   if (token2->flags & NAMED_OP)
1648     b = CPP_NAME;
1649
1650   c = EOF;
1651   if (token2->flags & DIGRAPH)
1652     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1653   else if (token_spellings[b].category == SPELL_OPERATOR)
1654     c = token_spellings[b].name[0];
1655
1656   /* Quickly get everything that can paste with an '='.  */
1657   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1658     return 1;
1659
1660   switch (a)
1661     {
1662     case CPP_GREATER:   return c == '>' || c == '?';
1663     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1664     case CPP_PLUS:      return c == '+';
1665     case CPP_MINUS:     return c == '-' || c == '>';
1666     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1667     case CPP_MOD:       return c == ':' || c == '>';
1668     case CPP_AND:       return c == '&';
1669     case CPP_OR:        return c == '|';
1670     case CPP_COLON:     return c == ':' || c == '>';
1671     case CPP_DEREF:     return c == '*';
1672     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1673     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1674     case CPP_NAME:      return ((b == CPP_NUMBER
1675                                  && name_p (pfile, &token2->val.str))
1676                                 || b == CPP_NAME
1677                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1678     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1679                                 || c == '.' || c == '+' || c == '-');
1680     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1681                                 && token1->val.c == '@'
1682                                 && (b == CPP_NAME || b == CPP_STRING));
1683     default:            break;
1684     }
1685
1686   return 0;
1687 }
1688
1689 /* Output all the remaining tokens on the current line, and a newline
1690    character, to FP.  Leading whitespace is removed.  If there are
1691    macros, special token padding is not performed.  */
1692 void
1693 cpp_output_line (pfile, fp)
1694      cpp_reader *pfile;
1695      FILE *fp;
1696 {
1697   const cpp_token *token;
1698
1699   token = cpp_get_token (pfile);
1700   while (token->type != CPP_EOF)
1701     {
1702       cpp_output_token (token, fp);
1703       token = cpp_get_token (pfile);
1704       if (token->flags & PREV_WHITE)
1705         putc (' ', fp);
1706     }
1707
1708   putc ('\n', fp);
1709 }
1710
1711 /* Returns the value of a hexadecimal digit.  */
1712 static unsigned int
1713 hex_digit_value (c)
1714      unsigned int c;
1715 {
1716   if (hex_p (c))
1717     return hex_value (c);
1718   else
1719     abort ();
1720 }
1721
1722 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1723    failure if cpplib is not parsing C++ or C99.  Such failure is
1724    silent, and no variables are updated.  Otherwise returns 0, and
1725    warns if -Wtraditional.
1726
1727    [lex.charset]: The character designated by the universal character
1728    name \UNNNNNNNN is that character whose character short name in
1729    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1730    universal character name \uNNNN is that character whose character
1731    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1732    for a universal character name is less than 0x20 or in the range
1733    0x7F-0x9F (inclusive), or if the universal character name
1734    designates a character in the basic source character set, then the
1735    program is ill-formed.
1736
1737    We assume that wchar_t is Unicode, so we don't need to do any
1738    mapping.  Is this ever wrong?
1739
1740    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1741    LIMIT is the end of the string or charconst.  PSTR is updated to
1742    point after the UCS on return, and the UCS is written into PC.  */
1743
1744 static int
1745 maybe_read_ucs (pfile, pstr, limit, pc)
1746      cpp_reader *pfile;
1747      const unsigned char **pstr;
1748      const unsigned char *limit;
1749      unsigned int *pc;
1750 {
1751   const unsigned char *p = *pstr;
1752   unsigned int code = 0;
1753   unsigned int c = *pc, length;
1754
1755   /* Only attempt to interpret a UCS for C++ and C99.  */
1756   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1757     return 1;
1758
1759   if (CPP_WTRADITIONAL (pfile))
1760     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1761
1762   length = (c == 'u' ? 4: 8);
1763
1764   if ((size_t) (limit - p) < length)
1765     {
1766       cpp_error (pfile, "incomplete universal-character-name");
1767       /* Skip to the end to avoid more diagnostics.  */
1768       p = limit;
1769     }
1770   else
1771     {
1772       for (; length; length--, p++)
1773         {
1774           c = *p;
1775           if (ISXDIGIT (c))
1776             code = (code << 4) + hex_digit_value (c);
1777           else
1778             {
1779               cpp_error (pfile,
1780                          "non-hex digit '%c' in universal-character-name", c);
1781               /* We shouldn't skip in case there are multibyte chars.  */
1782               break;
1783             }
1784         }
1785     }
1786
1787 #ifdef TARGET_EBCDIC
1788   cpp_error (pfile, "universal-character-name on EBCDIC target");
1789   code = 0x3f;  /* EBCDIC invalid character */
1790 #else
1791  /* True extended characters are OK.  */
1792   if (code >= 0xa0
1793       && !(code & 0x80000000)
1794       && !(code >= 0xD800 && code <= 0xDFFF))
1795     ;
1796   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1797      hex escapes so that this also works with EBCDIC hosts.  */
1798   else if (code == 0x24 || code == 0x40 || code == 0x60)
1799     ;
1800   /* Don't give another error if one occurred above.  */
1801   else if (length == 0)
1802     cpp_error (pfile, "universal-character-name out of range");
1803 #endif
1804
1805   *pstr = p;
1806   *pc = code;
1807   return 0;
1808 }
1809
1810 /* Interpret an escape sequence, and return its value.  PSTR points to
1811    the input pointer, which is just after the backslash.  LIMIT is how
1812    much text we have.  MASK is a bitmask for the precision for the
1813    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1814    interpret escapes that did not exist in traditional C.
1815
1816    Handles all relevant diagnostics.  */
1817 unsigned int
1818 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1819      cpp_reader *pfile;
1820      const unsigned char **pstr;
1821      const unsigned char *limit;
1822      unsigned HOST_WIDE_INT mask;
1823      int traditional;
1824 {
1825   int unknown = 0;
1826   const unsigned char *str = *pstr;
1827   unsigned int c = *str++;
1828
1829   switch (c)
1830     {
1831     case '\\': case '\'': case '"': case '?': break;
1832     case 'b': c = TARGET_BS;      break;
1833     case 'f': c = TARGET_FF;      break;
1834     case 'n': c = TARGET_NEWLINE; break;
1835     case 'r': c = TARGET_CR;      break;
1836     case 't': c = TARGET_TAB;     break;
1837     case 'v': c = TARGET_VT;      break;
1838
1839     case '(': case '{': case '[': case '%':
1840       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1841          '\%' is used to prevent SCCS from getting confused.  */
1842       unknown = CPP_PEDANTIC (pfile);
1843       break;
1844
1845     case 'a':
1846       if (CPP_WTRADITIONAL (pfile))
1847         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1848       if (!traditional)
1849         c = TARGET_BELL;
1850       break;
1851
1852     case 'e': case 'E':
1853       if (CPP_PEDANTIC (pfile))
1854         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1855       c = TARGET_ESC;
1856       break;
1857
1858     case 'u': case 'U':
1859       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1860       break;
1861
1862     case 'x':
1863       if (CPP_WTRADITIONAL (pfile))
1864         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1865
1866       if (!traditional)
1867         {
1868           unsigned int i = 0, overflow = 0;
1869           int digits_found = 0;
1870
1871           while (str < limit)
1872             {
1873               c = *str;
1874               if (! ISXDIGIT (c))
1875                 break;
1876               str++;
1877               overflow |= i ^ (i << 4 >> 4);
1878               i = (i << 4) + hex_digit_value (c);
1879               digits_found = 1;
1880             }
1881
1882           if (!digits_found)
1883             cpp_error (pfile, "\\x used with no following hex digits");
1884
1885           if (overflow | (i != (i & mask)))
1886             {
1887               cpp_pedwarn (pfile, "hex escape sequence out of range");
1888               i &= mask;
1889             }
1890           c = i;
1891         }
1892       break;
1893
1894     case '0':  case '1':  case '2':  case '3':
1895     case '4':  case '5':  case '6':  case '7':
1896       {
1897         unsigned int i = c - '0';
1898         int count = 0;
1899
1900         while (str < limit && ++count < 3)
1901           {
1902             c = *str;
1903             if (c < '0' || c > '7')
1904               break;
1905             str++;
1906             i = (i << 3) + c - '0';
1907           }
1908
1909         if (i != (i & mask))
1910           {
1911             cpp_pedwarn (pfile, "octal escape sequence out of range");
1912             i &= mask;
1913           }
1914         c = i;
1915       }
1916       break;
1917
1918     default:
1919       unknown = 1;
1920       break;
1921     }
1922
1923   if (unknown)
1924     {
1925       if (ISGRAPH (c))
1926         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1927       else
1928         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1929     }
1930
1931   if (c > mask)
1932     cpp_pedwarn (pfile, "escape sequence out of range for character");
1933
1934   *pstr = str;
1935   return c;
1936 }
1937
1938 #ifndef MAX_CHAR_TYPE_SIZE
1939 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1940 #endif
1941
1942 #ifndef MAX_WCHAR_TYPE_SIZE
1943 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1944 #endif
1945
1946 /* Interpret a (possibly wide) character constant in TOKEN.
1947    WARN_MULTI warns about multi-character charconsts, if not
1948    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1949    that did not exist in traditional C.  PCHARS_SEEN points to a
1950    variable that is filled in with the number of characters seen.  */
1951 HOST_WIDE_INT
1952 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1953      cpp_reader *pfile;
1954      const cpp_token *token;
1955      int warn_multi;
1956      int traditional;
1957      unsigned int *pchars_seen;
1958 {
1959   const unsigned char *str = token->val.str.text;
1960   const unsigned char *limit = str + token->val.str.len;
1961   unsigned int chars_seen = 0;
1962   unsigned int width, max_chars, c;
1963   unsigned HOST_WIDE_INT mask;
1964   HOST_WIDE_INT result = 0;
1965   bool unsigned_p;
1966
1967 #ifdef MULTIBYTE_CHARS
1968   (void) local_mbtowc (NULL, NULL, 0);
1969 #endif
1970
1971   /* Width in bits.  */
1972   if (token->type == CPP_CHAR)
1973     {
1974       width = MAX_CHAR_TYPE_SIZE;
1975       unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
1976     }
1977   else
1978     {
1979       width = MAX_WCHAR_TYPE_SIZE;
1980       unsigned_p = WCHAR_UNSIGNED;
1981     }
1982
1983   if (width < HOST_BITS_PER_WIDE_INT)
1984     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1985   else
1986     mask = ~0;
1987   max_chars = HOST_BITS_PER_WIDE_INT / width;
1988
1989   while (str < limit)
1990     {
1991 #ifdef MULTIBYTE_CHARS
1992       wchar_t wc;
1993       int char_len;
1994
1995       char_len = local_mbtowc (&wc, str, limit - str);
1996       if (char_len == -1)
1997         {
1998           cpp_warning (pfile, "ignoring invalid multibyte character");
1999           c = *str++;
2000         }
2001       else
2002         {
2003           str += char_len;
2004           c = wc;
2005         }
2006 #else
2007       c = *str++;
2008 #endif
2009
2010       if (c == '\\')
2011         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2012
2013 #ifdef MAP_CHARACTER
2014       if (ISPRINT (c))
2015         c = MAP_CHARACTER (c);
2016 #endif
2017
2018       /* Merge character into result; ignore excess chars.  */
2019       if (++chars_seen <= max_chars)
2020         {
2021           if (width < HOST_BITS_PER_WIDE_INT)
2022             result = (result << width) | (c & mask);
2023           else
2024             result = c;
2025         }
2026     }
2027
2028   if (chars_seen == 0)
2029     cpp_error (pfile, "empty character constant");
2030   else if (chars_seen > max_chars)
2031     {
2032       chars_seen = max_chars;
2033       cpp_warning (pfile, "character constant too long");
2034     }
2035   else if (chars_seen > 1 && !traditional && warn_multi)
2036     cpp_warning (pfile, "multi-character character constant");
2037
2038   /* If relevant type is signed, sign-extend the constant.  */
2039   if (chars_seen)
2040     {
2041       unsigned int nbits = chars_seen * width;
2042
2043       mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
2044       if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
2045         result &= mask;
2046       else
2047         result |= ~mask;
2048     }
2049
2050   *pchars_seen = chars_seen;
2051   return result;
2052 }
2053
2054 /* Memory buffers.  Changing these three constants can have a dramatic
2055    effect on performance.  The values here are reasonable defaults,
2056    but might be tuned.  If you adjust them, be sure to test across a
2057    range of uses of cpplib, including heavy nested function-like macro
2058    expansion.  Also check the change in peak memory usage (NJAMD is a
2059    good tool for this).  */
2060 #define MIN_BUFF_SIZE 8000
2061 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2062 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2063         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2064
2065 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2066   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2067 #endif
2068
2069 struct dummy
2070 {
2071   char c;
2072   union
2073   {
2074     double d;
2075     int *p;
2076   } u;
2077 };
2078
2079 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2080 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2081
2082 /* Create a new allocation buffer.  Place the control block at the end
2083    of the buffer, so that buffer overflows will cause immediate chaos.  */
2084 static _cpp_buff *
2085 new_buff (len)
2086      size_t len;
2087 {
2088   _cpp_buff *result;
2089   unsigned char *base;
2090
2091   if (len < MIN_BUFF_SIZE)
2092     len = MIN_BUFF_SIZE;
2093   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2094
2095   base = xmalloc (len + sizeof (_cpp_buff));
2096   result = (_cpp_buff *) (base + len);
2097   result->base = base;
2098   result->cur = base;
2099   result->limit = base + len;
2100   result->next = NULL;
2101   return result;
2102 }
2103
2104 /* Place a chain of unwanted allocation buffers on the free list.  */
2105 void
2106 _cpp_release_buff (pfile, buff)
2107      cpp_reader *pfile;
2108      _cpp_buff *buff;
2109 {
2110   _cpp_buff *end = buff;
2111
2112   while (end->next)
2113     end = end->next;
2114   end->next = pfile->free_buffs;
2115   pfile->free_buffs = buff;
2116 }
2117
2118 /* Return a free buffer of size at least MIN_SIZE.  */
2119 _cpp_buff *
2120 _cpp_get_buff (pfile, min_size)
2121      cpp_reader *pfile;
2122      size_t min_size;
2123 {
2124   _cpp_buff *result, **p;
2125
2126   for (p = &pfile->free_buffs;; p = &(*p)->next)
2127     {
2128       size_t size;
2129
2130       if (*p == NULL)
2131         return new_buff (min_size);
2132       result = *p;
2133       size = result->limit - result->base;
2134       /* Return a buffer that's big enough, but don't waste one that's
2135          way too big.  */
2136       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2137         break;
2138     }
2139
2140   *p = result->next;
2141   result->next = NULL;
2142   result->cur = result->base;
2143   return result;
2144 }
2145
2146 /* Creates a new buffer with enough space to hold the uncommitted
2147    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2148    the excess bytes to the new buffer.  Chains the new buffer after
2149    BUFF, and returns the new buffer.  */
2150 _cpp_buff *
2151 _cpp_append_extend_buff (pfile, buff, min_extra)
2152      cpp_reader *pfile;
2153      _cpp_buff *buff;
2154      size_t min_extra;
2155 {
2156   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2157   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2158
2159   buff->next = new_buff;
2160   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2161   return new_buff;
2162 }
2163
2164 /* Creates a new buffer with enough space to hold the uncommitted
2165    remaining bytes of the buffer pointed to by BUFF, and at least
2166    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2167    Chains the new buffer before the buffer pointed to by BUFF, and
2168    updates the pointer to point to the new buffer.  */
2169 void
2170 _cpp_extend_buff (pfile, pbuff, min_extra)
2171      cpp_reader *pfile;
2172      _cpp_buff **pbuff;
2173      size_t min_extra;
2174 {
2175   _cpp_buff *new_buff, *old_buff = *pbuff;
2176   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2177
2178   new_buff = _cpp_get_buff (pfile, size);
2179   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2180   new_buff->next = old_buff;
2181   *pbuff = new_buff;
2182 }
2183
2184 /* Free a chain of buffers starting at BUFF.  */
2185 void
2186 _cpp_free_buff (buff)
2187      _cpp_buff *buff;
2188 {
2189   _cpp_buff *next;
2190
2191   for (; buff; buff = next)
2192     {
2193       next = buff->next;
2194       free (buff->base);
2195     }
2196 }
2197
2198 /* Allocate permanent, unaligned storage of length LEN.  */
2199 unsigned char *
2200 _cpp_unaligned_alloc (pfile, len)
2201      cpp_reader *pfile;
2202      size_t len;
2203 {
2204   _cpp_buff *buff = pfile->u_buff;
2205   unsigned char *result = buff->cur;
2206
2207   if (len > (size_t) (buff->limit - result))
2208     {
2209       buff = _cpp_get_buff (pfile, len);
2210       buff->next = pfile->u_buff;
2211       pfile->u_buff = buff;
2212       result = buff->cur;
2213     }
2214
2215   buff->cur = result + len;
2216   return result;
2217 }
2218
2219 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2220    That buffer is used for growing allocations when saving macro
2221    replacement lists in a #define, and when parsing an answer to an
2222    assertion in #assert, #unassert or #if (and therefore possibly
2223    whilst expanding macros).  It therefore must not be used by any
2224    code that they might call: specifically the lexer and the guts of
2225    the macro expander.
2226
2227    All existing other uses clearly fit this restriction: storing
2228    registered pragmas during initialization.  */
2229 unsigned char *
2230 _cpp_aligned_alloc (pfile, len)
2231      cpp_reader *pfile;
2232      size_t len;
2233 {
2234   _cpp_buff *buff = pfile->a_buff;
2235   unsigned char *result = buff->cur;
2236
2237   if (len > (size_t) (buff->limit - result))
2238     {
2239       buff = _cpp_get_buff (pfile, len);
2240       buff->next = pfile->a_buff;
2241       pfile->a_buff = buff;
2242       result = buff->cur;
2243     }
2244
2245   buff->cur = result + len;
2246   return result;
2247 }