git.gag.com Git - fw/sdcc/blob - support/cpp2/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27 #include <assert.h>
  28
  29 #ifdef MULTIBYTE_CHARS
  30 #include "mbchar.h"
  31 #include <locale.h>
  32 #endif
  33
  34 /* Tokens with SPELL_STRING store their spelling in the token list,
  35    and it's length in the token->val.name.len.  */
  36 enum spell_type
  37 {
  38   SPELL_OPERATOR = 0,
  39   SPELL_CHAR,
  40   SPELL_IDENT,
  41   SPELL_NUMBER,
  42   SPELL_STRING,
  43   SPELL_NONE
  44 };
  45
  46 struct token_spelling
  47 {
  48   enum spell_type category;
  49   const unsigned char *name;
  50 };
  51
  52 static const unsigned char *const digraph_spellings[] =
  53 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  54
  55 #define OP(e, s) { SPELL_OPERATOR, U s           },
  56 #define TK(e, s) { s,              U STRINGX (e) },
  57 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  58 #undef OP
  59 #undef TK
  60
  61 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  62 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  63 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  64
  65 static void handle_newline PARAMS ((cpp_reader *));
  66 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  67 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  68
  69 static int skip_asm_block PARAMS ((cpp_reader *));
  70 static int skip_block_comment PARAMS ((cpp_reader *));
  71 static int skip_line_comment PARAMS ((cpp_reader *));
  72 static void adjust_column PARAMS ((cpp_reader *));
  73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  76                                   unsigned int *));
  77 static void pedantic_parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  78 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  79 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  80 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  81 static bool trigraph_p PARAMS ((cpp_reader *));
  82 static unsigned int copy_text_chars PARAMS ((char *, const char *, unsigned int));
  83 static void save_asm PARAMS ((cpp_reader *, cpp_token *, const uchar *));
  84 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  85                                   cppchar_t));
  86 static bool continue_after_nul PARAMS ((cpp_reader *));
  87 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  88 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  89                                    const unsigned char *, cppchar_t *));
  90 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  91
  92 static unsigned int hex_digit_value PARAMS ((unsigned int));
  93 static _cpp_buff *new_buff PARAMS ((size_t));
  94
  95 /* Utility routine:
  96
  97    Compares, the token TOKEN to the NUL-terminated string STRING.
  98    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  99 int
 100 cpp_ideq (token, string)
 101      const cpp_token *token;
 102      const char *string;
 103 {
 104   if (token->type != CPP_NAME)
 105     return 0;
 106
 107   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 108 }
 109
 110 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 111    Returns with buffer->cur pointing to the character immediately
 112    following the newline (combination).  */
 113 static void
 114 handle_newline (pfile)
 115      cpp_reader *pfile;
 116 {
 117   cpp_buffer *buffer = pfile->buffer;
 118
 119   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 120      only accept CR-LF; maybe we should fall back to that behavior?  */
 121   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 122     buffer->cur++;
 123
 124   buffer->line_base = buffer->cur;
 125   buffer->col_adjust = 0;
 126   pfile->line++;
 127 }
 128
 129 /* Subroutine of skip_escaped_newlines; called when a 3-character
 130    sequence beginning with "??" is encountered.  buffer->cur points to
 131    the second '?'.
 132
 133    Warn if necessary, and returns true if the sequence forms a
 134    trigraph and the trigraph should be honored.  */
 135 static bool
 136 trigraph_p (pfile)
 137      cpp_reader *pfile;
 138 {
 139   cpp_buffer *buffer = pfile->buffer;
 140   cppchar_t from_char = buffer->cur[1];
 141   bool accept;
 142
 143   if (!_cpp_trigraph_map[from_char])
 144     return false;
 145
 146   accept = CPP_OPTION (pfile, trigraphs);
 147
 148   /* Don't warn about trigraphs in comments.  */
 149   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 150     {
 151       if (accept)
 152         cpp_error_with_line (pfile, DL_WARNING,
 153                              pfile->line, CPP_BUF_COL (buffer) - 1,
 154                              "trigraph ??%c converted to %c",
 155                              (int) from_char,
 156                              (int) _cpp_trigraph_map[from_char]);
 157       else if (buffer->cur != buffer->last_Wtrigraphs)
 158         {
 159           buffer->last_Wtrigraphs = buffer->cur;
 160           cpp_error_with_line (pfile, DL_WARNING,
 161                                pfile->line, CPP_BUF_COL (buffer) - 1,
 162                                "trigraph ??%c ignored", (int) from_char);
 163         }
 164     }
 165
 166   return accept;
 167 }
 168
 169 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 170    lie in buffer->cur[-1].  Returns the next byte, which will be in
 171    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 172    2 of the ISO C standard.  */
 173 static cppchar_t
 174 skip_escaped_newlines (pfile)
 175      cpp_reader *pfile;
 176 {
 177   cpp_buffer *buffer = pfile->buffer;
 178   cppchar_t next = buffer->cur[-1];
 179
 180   /* Only do this if we apply stages 1 and 2.  */
 181   if (!buffer->from_stage3)
 182     {
 183       const unsigned char *saved_cur;
 184       cppchar_t next1;
 185
 186       do
 187         {
 188           if (next == '?')
 189             {
 190               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 191                 break;
 192
 193               /* Translate the trigraph.  */
 194               next = _cpp_trigraph_map[buffer->cur[1]];
 195               buffer->cur += 2;
 196               if (next != '\\')
 197                 break;
 198             }
 199
 200           if (buffer->cur == buffer->rlimit)
 201             break;
 202
 203           /* We have a backslash, and room for at least one more
 204              character.  Skip horizontal whitespace.  */
 205           saved_cur = buffer->cur;
 206           do
 207             next1 = *buffer->cur++;
 208           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 209
 210           if (!is_vspace (next1))
 211             {
 212               buffer->cur = saved_cur;
 213               break;
 214             }
 215
 216           if (saved_cur != buffer->cur - 1
 217               && !pfile->state.lexing_comment)
 218             cpp_error (pfile, DL_WARNING,
 219                        "backslash and newline separated by space");
 220
 221           handle_newline (pfile);
 222           buffer->backup_to = buffer->cur;
 223           if (buffer->cur == buffer->rlimit)
 224             {
 225               cpp_error (pfile, DL_PEDWARN,
 226                          "backslash-newline at end of file");
 227               next = EOF;
 228             }
 229           else
 230             next = *buffer->cur++;
 231         }
 232       while (next == '\\' || next == '?');
 233     }
 234
 235   return next;
 236 }
 237
 238 /* Obtain the next character, after trigraph conversion and skipping
 239    an arbitrarily long string of escaped newlines.  The common case of
 240    no trigraphs or escaped newlines falls through quickly.  On return,
 241    buffer->backup_to points to where to return to if the character is
 242    not to be processed.  */
 243 static cppchar_t
 244 get_effective_char (pfile)
 245      cpp_reader *pfile;
 246 {
 247   cppchar_t next;
 248   cpp_buffer *buffer = pfile->buffer;
 249
 250   buffer->backup_to = buffer->cur;
 251   next = *buffer->cur++;
 252   if (__builtin_expect (next == '?' || next == '\\', 0))
 253     next = skip_escaped_newlines (pfile);
 254
 255   return next;
 256 }
 257
 258 /* SDCC _asm specific */
 259 /* Skip an _asm ... _endasm block.  We find the end of the comment by
 260    seeing _endasm.  Returns non-zero if _asm terminated by EOF, zero
 261    otherwise.  */
 262 static int
 263 skip_asm_block (pfile)
 264      cpp_reader *pfile;
 265 {
 266 #define _ENDASM_STR "endasm"
 267 #define _ENDASM_LEN ((sizeof _ENDASM_STR) - 1)
 268
 269   cpp_buffer *buffer = pfile->buffer;
 270   cppchar_t c = EOF;
 271   int prev_space = 0;
 272   int ret = 1;
 273
 274   pfile->state.lexing_comment = 1;
 275   while (buffer->cur != buffer->rlimit)
 276     {
 277       prev_space = is_space(c);
 278       c = *buffer->cur++;
 279
 280       /* FIXME: For speed, create a new character class of characters
 281          of interest inside block comments.  */
 282       if (c == '?' || c == '\\')
 283         c = skip_escaped_newlines (pfile);
 284
 285       if (prev_space && c == '_')
 286         {
 287           if (buffer->cur + _ENDASM_LEN <= buffer->rlimit &&
 288             strncmp(buffer->cur, _ENDASM_STR, _ENDASM_LEN) == 0)
 289             {
 290               buffer->cur += _ENDASM_LEN;
 291               ret = 0;
 292               break;
 293             }
 294         }
 295       else if (is_vspace (c))
 296         {
 297           prev_space = is_space(c);
 298           handle_newline (pfile);
 299         }
 300       else if (c == '\t')
 301         adjust_column (pfile);
 302     }
 303
 304   pfile->state.lexing_comment = 0;
 305   return ret;
 306 }
 307
 308 /* Skip a C-style block comment.  We find the end of the comment by
 309    seeing if an asterisk is before every '/' we encounter.  Returns
 310    nonzero if comment terminated by EOF, zero otherwise.  */
 311 static int
 312 skip_block_comment (pfile)
 313      cpp_reader *pfile;
 314 {
 315   cpp_buffer *buffer = pfile->buffer;
 316   cppchar_t c = EOF, prevc = EOF;
 317
 318   pfile->state.lexing_comment = 1;
 319   while (buffer->cur != buffer->rlimit)
 320     {
 321       prevc = c, c = *buffer->cur++;
 322
 323       /* FIXME: For speed, create a new character class of characters
 324          of interest inside block comments.  */
 325       if (c == '?' || c == '\\')
 326         c = skip_escaped_newlines (pfile);
 327
 328       /* People like decorating comments with '*', so check for '/'
 329          instead for efficiency.  */
 330       if (c == '/')
 331         {
 332           if (prevc == '*')
 333             break;
 334
 335           /* Warn about potential nested comments, but not if the '/'
 336              comes immediately before the true comment delimiter.
 337              Don't bother to get it right across escaped newlines.  */
 338           if (CPP_OPTION (pfile, warn_comments)
 339               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 340             cpp_error_with_line (pfile, DL_WARNING,
 341                                  pfile->line, CPP_BUF_COL (buffer),
 342                                  "\"/*\" within comment");
 343         }
 344       else if (is_vspace (c))
 345         handle_newline (pfile);
 346       else if (c == '\t')
 347         adjust_column (pfile);
 348     }
 349
 350   pfile->state.lexing_comment = 0;
 351   return c != '/' || prevc != '*';
 352 }
 353
 354 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 355    terminating newline.  Handles escaped newlines.  Returns nonzero
 356    if a multiline comment.  */
 357 static int
 358 skip_line_comment (pfile)
 359      cpp_reader *pfile;
 360 {
 361   cpp_buffer *buffer = pfile->buffer;
 362   unsigned int orig_line = pfile->line;
 363   cppchar_t c;
 364 #ifdef MULTIBYTE_CHARS
 365   wchar_t wc;
 366   int char_len;
 367 #endif
 368
 369   pfile->state.lexing_comment = 1;
 370 #ifdef MULTIBYTE_CHARS
 371   /* Reset multibyte conversion state.  */
 372   (void) local_mbtowc (NULL, NULL, 0);
 373 #endif
 374   do
 375     {
 376       if (buffer->cur == buffer->rlimit)
 377         goto at_eof;
 378
 379 #ifdef MULTIBYTE_CHARS
 380       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 381                                buffer->rlimit - buffer->cur);
 382       if (char_len == -1)
 383         {
 384           cpp_error (pfile, DL_WARNING,
 385                      "ignoring invalid multibyte character");
 386           char_len = 1;
 387           c = *buffer->cur++;
 388         }
 389       else
 390         {
 391           buffer->cur += char_len;
 392           c = wc;
 393         }
 394 #else
 395       c = *buffer->cur++;
 396 #endif
 397       if (c == '?' || c == '\\')
 398         c = skip_escaped_newlines (pfile);
 399     }
 400   while (!is_vspace (c));
 401
 402   /* Step back over the newline, except at EOF.  */
 403   buffer->cur--;
 404  at_eof:
 405
 406   pfile->state.lexing_comment = 0;
 407   return orig_line != pfile->line;
 408 }
 409
 410 /* pfile->buffer->cur is one beyond the \t character.  Update
 411    col_adjust so we track the column correctly.  */
 412 static void
 413 adjust_column (pfile)
 414      cpp_reader *pfile;
 415 {
 416   cpp_buffer *buffer = pfile->buffer;
 417   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 418
 419   /* Round it up to multiple of the tabstop, but subtract 1 since the
 420      tab itself occupies a character position.  */
 421   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 422                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 423 }
 424
 425 /* Skips whitespace, saving the next non-whitespace character.
 426    Adjusts pfile->col_adjust to account for tabs.  Without this,
 427    tokens might be assigned an incorrect column.  */
 428 static int
 429 skip_whitespace (pfile, c)
 430      cpp_reader *pfile;
 431      cppchar_t c;
 432 {
 433   cpp_buffer *buffer = pfile->buffer;
 434   unsigned int warned = 0;
 435
 436   do
 437     {
 438       /* Horizontal space always OK.  */
 439       if (c == ' ')
 440         ;
 441       else if (c == '\t')
 442         adjust_column (pfile);
 443       /* Just \f \v or \0 left.  */
 444       else if (c == '\0')
 445         {
 446           if (buffer->cur - 1 == buffer->rlimit)
 447             return 0;
 448           if (!warned)
 449             {
 450               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 451               warned = 1;
 452             }
 453         }
 454       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 455         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 456                              CPP_BUF_COL (buffer),
 457                              "%s in preprocessing directive",
 458                              c == '\f' ? "form feed" : "vertical tab");
 459
 460       c = *buffer->cur++;
 461     }
 462   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 463   while (is_nvspace (c));
 464
 465   buffer->cur--;
 466   return 1;
 467 }
 468
 469 /* See if the characters of a number token are valid in a name (no
 470    '.', '+' or '-').  */
 471 static int
 472 name_p (pfile, string)
 473      cpp_reader *pfile;
 474      const cpp_string *string;
 475 {
 476   unsigned int i;
 477
 478   for (i = 0; i < string->len; i++)
 479     if (!is_idchar (string->text[i]))
 480       return 0;
 481
 482   return 1;
 483 }
 484
 485 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 486    a critical inner loop.  The common case is an identifier which has
 487    not been split by backslash-newline, does not contain a dollar
 488    sign, and has already been scanned (roughly 10:1 ratio of
 489    seen:unseen identifiers in normal code; the distribution is
 490    Poisson-like).  Second most common case is a new identifier, not
 491    split and no dollar sign.  The other possibilities are rare and
 492    have been relegated to parse_slow.  */
 493 static cpp_hashnode *
 494 parse_identifier (pfile)
 495      cpp_reader *pfile;
 496 {
 497   cpp_hashnode *result;
 498   const uchar *cur, *base;
 499
 500   /* Fast-path loop.  Skim over a normal identifier.
 501      N.B. ISIDNUM does not include $.  */
 502   cur = pfile->buffer->cur;
 503   while (ISIDNUM (*cur))
 504     cur++;
 505
 506   /* Check for slow-path cases.  */
 507   if (*cur == '?' || *cur == '\\' || *cur == '$')
 508     {
 509       unsigned int len;
 510
 511       base = parse_slow (pfile, cur, 0, &len);
 512       result = (cpp_hashnode *)
 513         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 514     }
 515   else
 516     {
 517       base = pfile->buffer->cur - 1;
 518       pfile->buffer->cur = cur;
 519       result = (cpp_hashnode *)
 520         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 521     }
 522
 523   /* Rarely, identifiers require diagnostics when lexed.
 524      XXX Has to be forced out of the fast path.  */
 525   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 526                         && !pfile->state.skipping, 0))
 527     {
 528       /* It is allowed to poison the same identifier twice.  */
 529       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 530         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 531                    NODE_NAME (result));
 532
 533       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 534          replacement list of a variadic macro.  */
 535       if (result == pfile->spec_nodes.n__VA_ARGS__
 536           && !pfile->state.va_args_ok)
 537         cpp_error (pfile, DL_PEDWARN,
 538         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 539     }
 540
 541   return result;
 542 }
 543
 544 /* Slow path.  This handles numbers and identifiers which have been
 545    split, or contain dollar signs.  The part of the token from
 546    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 547    1 if it's a number, and 2 if it has a leading period.  Returns a
 548    pointer to the token's NUL-terminated spelling in permanent
 549    storage, and sets PLEN to its length.  */
 550 static uchar *
 551 parse_slow (pfile, cur, number_p, plen)
 552      cpp_reader *pfile;
 553      const uchar *cur;
 554      int number_p;
 555      unsigned int *plen;
 556 {
 557   cpp_buffer *buffer = pfile->buffer;
 558   const uchar *base = buffer->cur - 1;
 559   struct obstack *stack = &pfile->hash_table->stack;
 560   unsigned int c, prevc, saw_dollar = 0;
 561
 562   /* Place any leading period.  */
 563   if (number_p == 2)
 564     obstack_1grow (stack, '.');
 565
 566   /* Copy the part of the token which is known to be okay.  */
 567   obstack_grow (stack, base, cur - base);
 568
 569   /* Now process the part which isn't.  We are looking at one of
 570      '$', '\\', or '?' on entry to this loop.  */
 571   prevc = cur[-1];
 572   c = *cur++;
 573   buffer->cur = cur;
 574   for (;;)
 575     {
 576       /* Potential escaped newline?  */
 577       buffer->backup_to = buffer->cur - 1;
 578       if (c == '?' || c == '\\')
 579         c = skip_escaped_newlines (pfile);
 580
 581       if (!is_idchar (c))
 582         {
 583           if (!number_p)
 584             break;
 585           if (c != '.' && !VALID_SIGN (c, prevc))
 586             break;
 587         }
 588
 589       /* Handle normal identifier characters in this loop.  */
 590       do
 591         {
 592           prevc = c;
 593           obstack_1grow (stack, c);
 594
 595           if (c == '$')
 596             saw_dollar++;
 597
 598           c = *buffer->cur++;
 599         }
 600       while (is_idchar (c));
 601     }
 602
 603   /* Step back over the unwanted char.  */
 604   BACKUP ();
 605
 606   /* $ is not an identifier character in the standard, but is commonly
 607      accepted as an extension.  Don't warn about it in skipped
 608      conditional blocks.  */
 609   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 610     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 611
 612   /* Identifiers and numbers are null-terminated.  */
 613   *plen = obstack_object_size (stack);
 614   obstack_1grow (stack, '\0');
 615   return obstack_finish (stack);
 616 }
 617
 618 /* SDCC specific */
 619 /* Pedantic parse a number, beginning with character C, skipping embedded
 620    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 621    before C.  Place the result in NUMBER.  */
 622 static void
 623 pedantic_parse_number (pfile, number, leading_period)
 624      cpp_reader *pfile;
 625      cpp_string *number;
 626      int leading_period;
 627 {
 628   enum num_type_e { NT_DEC, NT_HEX } num_type = NT_DEC;
 629   enum num_part_e { NP_WHOLE, NP_FRACT, NP_EXP, NP_INT_SUFFIX, NP_FLOAT_SUFFIX } num_part = NP_WHOLE;
 630
 631   uchar c = *(pfile->buffer->cur - 1);
 632   struct obstack *stack = &pfile->hash_table->stack;
 633   cpp_buffer *buffer = pfile->buffer;
 634   int len = 0;
 635   int has_whole = 0;
 636   int has_fract = 0;
 637
 638   if (leading_period)
 639     {
 640       num_part = NP_FRACT;
 641       ++len;
 642       obstack_1grow (stack, '.');
 643       c = get_effective_char(pfile);
 644     }
 645   else
 646     {
 647       if ('0' == c)
 648         {
 649           has_whole = 1;
 650           ++len;
 651           obstack_1grow (stack, c);
 652           c = get_effective_char(pfile);
 653
 654           switch (c)
 655             {
 656             case 'X':
 657             case 'x':
 658               num_type = NT_HEX;
 659               ++len;
 660               obstack_1grow (stack, c);
 661               c = get_effective_char(pfile);
 662               break;
 663
 664             case '.':
 665               num_part = NP_FRACT;
 666               ++len;
 667               obstack_1grow (stack, c);
 668               c = get_effective_char(pfile);
 669               break;
 670             }
 671         }
 672     }
 673
 674   for (; ; )
 675     {
 676       switch (num_part)
 677         {
 678         case NP_WHOLE:
 679           if (NT_DEC == num_type)
 680             {
 681               while (ISDIGIT (c))
 682                 {
 683                   has_whole = 1;
 684                   ++len;
 685                   obstack_1grow (stack, c);
 686                   c = get_effective_char(pfile);
 687                 }
 688
 689               if ('.' == c)
 690                 {
 691                   num_part = NP_FRACT;
 692                   ++len;
 693                   obstack_1grow (stack, c);
 694                   c = get_effective_char(pfile);
 695                   continue;
 696                 }
 697               else if ('E' == c || 'e' == c)
 698                 {
 699                   if (has_whole || has_fract)
 700                   {
 701                     num_part = NP_EXP;
 702                     ++len;
 703                     obstack_1grow (stack, c);
 704                     c = get_effective_char(pfile);
 705                     continue;
 706                   }
 707                   else
 708                     break;
 709                 }
 710             }
 711           else
 712             {
 713               while (ISXDIGIT (c))
 714                 {
 715                   has_whole = 1;
 716                   ++len;
 717                   obstack_1grow (stack, c);
 718                   c = get_effective_char(pfile);
 719                 }
 720
 721               if ('.' == c)
 722                 {
 723                   num_part = NP_FRACT;
 724                   ++len;
 725                   obstack_1grow (stack, c);
 726                   c = get_effective_char(pfile);
 727                   continue;
 728                 }
 729               else if ('P' == c || 'p' == c)
 730                 {
 731                   if (has_whole || has_fract)
 732                     {
 733                       num_part = NP_EXP;
 734                       ++len;
 735                       obstack_1grow (stack, c);
 736                       c = get_effective_char(pfile);
 737                       continue;
 738                     }
 739                   else
 740                     break;
 741                 }
 742             }
 743           num_part = NP_INT_SUFFIX;
 744           continue;
 745
 746         case NP_FRACT:
 747           if (NT_DEC == num_type)
 748             {
 749               while (ISDIGIT (c))
 750                 {
 751                   has_fract = 1;
 752                   ++len;
 753                   obstack_1grow (stack, c);
 754                   c = get_effective_char(pfile);
 755                 }
 756
 757               if ('E' == c || 'e' == c)
 758                 {
 759                   if (has_whole || has_fract)
 760                     {
 761                       num_part = NP_EXP;
 762                       ++len;
 763                       obstack_1grow (stack, c);
 764                       c = get_effective_char(pfile);
 765                       continue;
 766                     }
 767                 }
 768             }
 769           else
 770             {
 771               while (ISXDIGIT (c))
 772                 {
 773                   has_fract = 1;
 774                   ++len;
 775                   obstack_1grow (stack, c);
 776                   c = get_effective_char(pfile);
 777                 }
 778
 779               if ('P' == c || 'p' == c)
 780                 {
 781                   if (has_whole || has_fract)
 782                     {
 783                       num_part = NP_EXP;
 784                       ++len;
 785                       obstack_1grow (stack, c);
 786                       c = get_effective_char(pfile);
 787                       continue;
 788                     }
 789                 }
 790             }
 791           num_part = NP_FLOAT_SUFFIX;
 792           continue;
 793
 794         case NP_EXP:
 795           if ('+' == c || '-' == c)
 796             {
 797               ++len;
 798               obstack_1grow (stack, c);
 799               c = get_effective_char(pfile);
 800             }
 801
 802           while (ISDIGIT (c))
 803             {
 804               ++len;
 805               obstack_1grow (stack, c);
 806               c = get_effective_char(pfile);
 807             }
 808
 809           num_part = NP_FLOAT_SUFFIX;
 810           continue;
 811
 812         case NP_INT_SUFFIX:
 813            if ('L' == c || 'l' == c)
 814             {
 815               uchar prevc = c;
 816
 817               ++len;
 818               obstack_1grow (stack, c);
 819               c = get_effective_char(pfile);
 820
 821               if (c == prevc)
 822                 {
 823                   ++len;
 824                   obstack_1grow (stack, c);
 825                   c = get_effective_char(pfile);
 826                 }
 827             }
 828           else if ('U' == c || 'u' == c)
 829             {
 830               ++len;
 831               obstack_1grow (stack, c);
 832               c = get_effective_char(pfile);
 833             }
 834           break;
 835
 836         case NP_FLOAT_SUFFIX:
 837            if ('F' == c || 'f' == c)
 838             {
 839               ++len;
 840               obstack_1grow (stack, c);
 841               c = get_effective_char(pfile);
 842             }
 843           else if ('L' == c || 'l' == c)
 844             {
 845               ++len;
 846               obstack_1grow (stack, c);
 847               c = get_effective_char(pfile);
 848             }
 849           break;
 850         }
 851       break;
 852     }
 853
 854   /* Step back over the unwanted char.  */
 855   BACKUP ();
 856
 857   number->text = obstack_finish (stack);
 858   number->len = len;
 859 }
 860
 861 /* Parse a number, beginning with character C, skipping embedded
 862    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 863    before C.  Place the result in NUMBER.  */
 864 static void
 865 parse_number (pfile, number, leading_period)
 866      cpp_reader *pfile;
 867      cpp_string *number;
 868      int leading_period;
 869 {
 870   const uchar *cur;
 871
 872   /* Fast-path loop.  Skim over a normal number.
 873      N.B. ISIDNUM does not include $.  */
 874   cur = pfile->buffer->cur;
 875   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 876     cur++;
 877
 878   /* Check for slow-path cases.  */
 879   if (*cur == '?' || *cur == '\\' || *cur == '$')
 880     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 881   else
 882     {
 883       const uchar *base = pfile->buffer->cur - 1;
 884       uchar *dest;
 885
 886       number->len = cur - base + leading_period;
 887       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 888       dest[number->len] = '\0';
 889       number->text = dest;
 890
 891       if (leading_period)
 892         *dest++ = '.';
 893       memcpy (dest, base, cur - base);
 894       pfile->buffer->cur = cur;
 895     }
 896 }
 897
 898 /* Subroutine of parse_string.  */
 899 static int
 900 unescaped_terminator_p (pfile, dest)
 901      cpp_reader *pfile;
 902      const unsigned char *dest;
 903 {
 904   const unsigned char *start, *temp;
 905
 906   /* In #include-style directives, terminators are not escapeable.  */
 907   if (pfile->state.angled_headers)
 908     return 1;
 909
 910   start = BUFF_FRONT (pfile->u_buff);
 911
 912   /* An odd number of consecutive backslashes represents an escaped
 913      terminator.  */
 914   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 915     ;
 916
 917   return ((dest - temp) & 1) == 0;
 918 }
 919
 920 /* Parses a string, character constant, or angle-bracketed header file
 921    name.  Handles embedded trigraphs and escaped newlines.  The stored
 922    string is guaranteed NUL-terminated, but it is not guaranteed that
 923    this is the first NUL since embedded NULs are preserved.
 924
 925    When this function returns, buffer->cur points to the next
 926    character to be processed.  */
 927 static void
 928 parse_string (pfile, token, terminator)
 929      cpp_reader *pfile;
 930      cpp_token *token;
 931      cppchar_t terminator;
 932 {
 933   cpp_buffer *buffer = pfile->buffer;
 934   unsigned char *dest, *limit;
 935   cppchar_t c;
 936   bool warned_nulls = false;
 937 #ifdef MULTIBYTE_CHARS
 938   wchar_t wc;
 939   int char_len;
 940 #endif
 941
 942   dest = BUFF_FRONT (pfile->u_buff);
 943   limit = BUFF_LIMIT (pfile->u_buff);
 944
 945 #ifdef MULTIBYTE_CHARS
 946   /* Reset multibyte conversion state.  */
 947   (void) local_mbtowc (NULL, NULL, 0);
 948 #endif
 949   for (;;)
 950     {
 951       /* We need room for another char, possibly the terminating NUL.  */
 952       if ((size_t) (limit - dest) < 1)
 953         {
 954           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 955           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 956           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 957           limit = BUFF_LIMIT (pfile->u_buff);
 958         }
 959
 960 #ifdef MULTIBYTE_CHARS
 961       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 962                                buffer->rlimit - buffer->cur);
 963       if (char_len == -1)
 964         {
 965           cpp_error (pfile, DL_WARNING,
 966                      "ignoring invalid multibyte character");
 967           char_len = 1;
 968           c = *buffer->cur++;
 969         }
 970       else
 971         {
 972           buffer->cur += char_len;
 973           c = wc;
 974         }
 975 #else
 976       c = *buffer->cur++;
 977 #endif
 978
 979       /* Handle trigraphs, escaped newlines etc.  */
 980       if (c == '?' || c == '\\')
 981         c = skip_escaped_newlines (pfile);
 982
 983       if (c == terminator)
 984         {
 985           if (unescaped_terminator_p (pfile, dest))
 986             break;
 987         }
 988       else if (is_vspace (c))
 989         {
 990           /* No string literal may extend over multiple lines.  In
 991              assembly language, suppress the error except for <>
 992              includes.  This is a kludge around not knowing where
 993              comments are.  */
 994         unterminated:
 995           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 996             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 997                        (int) terminator);
 998           buffer->cur--;
 999           break;
1000         }
1001       else if (c == '\0')
1002         {
1003           if (buffer->cur - 1 == buffer->rlimit)
1004             goto unterminated;
1005           if (!warned_nulls)
1006             {
1007               warned_nulls = true;
1008               cpp_error (pfile, DL_WARNING,
1009                          "null character(s) preserved in literal");
1010             }
1011         }
1012 #ifdef MULTIBYTE_CHARS
1013       if (char_len > 1)
1014         {
1015           for ( ; char_len > 0; --char_len)
1016             *dest++ = (*buffer->cur - char_len);
1017         }
1018       else
1019 #endif
1020         *dest++ = c;
1021     }
1022
1023   *dest = '\0';
1024
1025   token->val.str.text = BUFF_FRONT (pfile->u_buff);
1026   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
1027   BUFF_FRONT (pfile->u_buff) = dest + 1;
1028 }
1029
1030 /* Fixed _WIN32 problem with CR-CR-LF sequences when outputting
1031    comment blocks (when executed with -C option) and
1032    _asm (SDCPP specific) blocks */
1033
1034 /* Count and copy characters from src to dest, excluding CRs:
1035    CRs are automatically generated, because the output is
1036    opened in TEXT mode. If dest == NULL, only count chars */
1037 static unsigned int
1038 copy_text_chars (dest, src, len)
1039      char *dest;
1040      const char *src;
1041      unsigned int len;
1042 {
1043   unsigned int n = 0;
1044   const char *p;
1045
1046   for (p = src; p != src + len; ++p)
1047     {
1048       assert(*p != '\0');
1049
1050       if (*p != '\r')
1051         {
1052           if (dest != NULL)
1053             *dest++ = *p;
1054           ++n;
1055         }
1056     }
1057
1058     return n;
1059 }
1060
1061 /* SDCC _asm specific */
1062 /* The stored comment includes the comment start and any terminator.  */
1063 static void
1064 save_asm (pfile, token, from)
1065      cpp_reader *pfile;
1066      cpp_token *token;
1067      const unsigned char *from;
1068 {
1069 #define _ASM_STR  "_asm"
1070 #define _ASM_LEN  ((sizeof _ASM_STR) - 1)
1071
1072   unsigned char *buffer;
1073   unsigned int text_len, len;
1074
1075   len = pfile->buffer->cur - from;
1076   /* + _ASM_LEN for the initial '_asm'.  */
1077   text_len = copy_text_chars (NULL, from, len) + _ASM_LEN;
1078   buffer = _cpp_unaligned_alloc (pfile, text_len);
1079
1080
1081   token->type = CPP_ASM;
1082   token->val.str.len = text_len;
1083   token->val.str.text = buffer;
1084
1085   memcpy (buffer, _ASM_STR, _ASM_LEN);
1086   copy_text_chars (buffer + _ASM_LEN, from, len);
1087 }
1088
1089 /* The stored comment includes the comment start and any terminator.  */
1090 static void
1091 save_comment (pfile, token, from, type)
1092      cpp_reader *pfile;
1093      cpp_token *token;
1094      const unsigned char *from;
1095      cppchar_t type;
1096 {
1097   unsigned char *buffer;
1098   unsigned int len, clen;
1099
1100   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1101
1102   /* C++ comments probably (not definitely) have moved past a new
1103      line, which we don't want to save in the comment.  */
1104   if (is_vspace (pfile->buffer->cur[-1]))
1105     len--;
1106
1107   /* If we are currently in a directive, then we need to store all
1108      C++ comments as C comments internally, and so we need to
1109      allocate a little extra space in that case.
1110
1111      Note that the only time we encounter a directive here is
1112      when we are saving comments in a "#define".  */
1113   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1114
1115   buffer = _cpp_unaligned_alloc (pfile, clen);
1116
1117   token->type = CPP_COMMENT;
1118   token->val.str.len = clen;
1119   token->val.str.text = buffer;
1120
1121   buffer[0] = '/';
1122   copy_text_chars (buffer + 1, from, len);
1123
1124   /* Finish conversion to a C comment, if necessary.  */
1125   if (pfile->state.in_directive && type == '/')
1126     {
1127       buffer[1] = '*';
1128       buffer[clen - 2] = '*';
1129       buffer[clen - 1] = '/';
1130     }
1131 }
1132
1133 /* Allocate COUNT tokens for RUN.  */
1134 void
1135 _cpp_init_tokenrun (run, count)
1136      tokenrun *run;
1137      unsigned int count;
1138 {
1139   run->base = xnewvec (cpp_token, count);
1140   run->limit = run->base + count;
1141   run->next = NULL;
1142 }
1143
1144 /* Returns the next tokenrun, or creates one if there is none.  */
1145 static tokenrun *
1146 next_tokenrun (run)
1147      tokenrun *run;
1148 {
1149   if (run->next == NULL)
1150     {
1151       run->next = xnew (tokenrun);
1152       run->next->prev = run;
1153       _cpp_init_tokenrun (run->next, 250);
1154     }
1155
1156   return run->next;
1157 }
1158
1159 /* Allocate a single token that is invalidated at the same time as the
1160    rest of the tokens on the line.  Has its line and col set to the
1161    same as the last lexed token, so that diagnostics appear in the
1162    right place.  */
1163 cpp_token *
1164 _cpp_temp_token (pfile)
1165      cpp_reader *pfile;
1166 {
1167   cpp_token *old, *result;
1168
1169   old = pfile->cur_token - 1;
1170   if (pfile->cur_token == pfile->cur_run->limit)
1171     {
1172       pfile->cur_run = next_tokenrun (pfile->cur_run);
1173       pfile->cur_token = pfile->cur_run->base;
1174     }
1175
1176   result = pfile->cur_token++;
1177   result->line = old->line;
1178   result->col = old->col;
1179   return result;
1180 }
1181
1182 /* Lex a token into RESULT (external interface).  Takes care of issues
1183    like directive handling, token lookahead, multiple include
1184    optimization and skipping.  */
1185 const cpp_token *
1186 _cpp_lex_token (pfile)
1187      cpp_reader *pfile;
1188 {
1189   cpp_token *result;
1190
1191   for (;;)
1192     {
1193       if (pfile->cur_token == pfile->cur_run->limit)
1194         {
1195           pfile->cur_run = next_tokenrun (pfile->cur_run);
1196           pfile->cur_token = pfile->cur_run->base;
1197         }
1198
1199       if (pfile->lookaheads)
1200         {
1201           pfile->lookaheads--;
1202           result = pfile->cur_token++;
1203         }
1204       else
1205         result = _cpp_lex_direct (pfile);
1206
1207       if (result->flags & BOL)
1208         {
1209           /* Is this a directive.  If _cpp_handle_directive returns
1210              false, it is an assembler #.  */
1211           if (result->type == CPP_HASH
1212               /* 6.10.3 p 11: Directives in a list of macro arguments
1213                  gives undefined behavior.  This implementation
1214                  handles the directive as normal.  */
1215               && pfile->state.parsing_args != 1
1216               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1217             continue;
1218           if (pfile->cb.line_change && !pfile->state.skipping)
1219             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
1220         }
1221
1222       /* We don't skip tokens in directives.  */
1223       if (pfile->state.in_directive)
1224         break;
1225
1226       /* Outside a directive, invalidate controlling macros.  At file
1227          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1228          get here and MI optimisation works.  */
1229       pfile->mi_valid = false;
1230
1231       if (!pfile->state.skipping || result->type == CPP_EOF)
1232         break;
1233     }
1234
1235   return result;
1236 }
1237
1238 /* A NUL terminates the current buffer.  For ISO preprocessing this is
1239    EOF, but for traditional preprocessing it indicates we need a line
1240    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
1241    to return a CPP_EOF to the caller.  */
1242 static bool
1243 continue_after_nul (pfile)
1244      cpp_reader *pfile;
1245 {
1246   cpp_buffer *buffer = pfile->buffer;
1247   bool more = false;
1248
1249   buffer->saved_flags = BOL;
1250   if (CPP_OPTION (pfile, traditional))
1251     {
1252       if (pfile->state.in_directive)
1253         return false;
1254
1255       _cpp_remove_overlay (pfile);
1256       more = _cpp_read_logical_line_trad (pfile);
1257       _cpp_overlay_buffer (pfile, pfile->out.base,
1258                            pfile->out.cur - pfile->out.base);
1259       pfile->line = pfile->out.first_line;
1260     }
1261   else
1262     {
1263       /* Stop parsing arguments with a CPP_EOF.  When we finally come
1264          back here, do the work of popping the buffer.  */
1265       if (!pfile->state.parsing_args)
1266         {
1267           if (buffer->cur != buffer->line_base)
1268             {
1269               /* Non-empty files should end in a newline.  Don't warn
1270                  for command line and _Pragma buffers.  */
1271               if (!buffer->from_stage3)
1272                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
1273               handle_newline (pfile);
1274             }
1275
1276           /* Similarly, finish an in-progress directive with CPP_EOF
1277              before popping the buffer.  */
1278           if (!pfile->state.in_directive && buffer->prev)
1279             {
1280               more = !buffer->return_at_eof;
1281               _cpp_pop_buffer (pfile);
1282             }
1283         }
1284     }
1285
1286   return more;
1287 }
1288
1289 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
1290   do {                                          \
1291     if (get_effective_char (pfile) == CHAR)     \
1292       result->type = THEN_TYPE;                 \
1293     else                                        \
1294       {                                         \
1295         BACKUP ();                              \
1296         result->type = ELSE_TYPE;               \
1297       }                                         \
1298   } while (0)
1299
1300 /* Lex a token into pfile->cur_token, which is also incremented, to
1301    get diagnostics pointing to the correct location.
1302
1303    Does not handle issues such as token lookahead, multiple-include
1304    optimisation, directives, skipping etc.  This function is only
1305    suitable for use by _cpp_lex_token, and in special cases like
1306    lex_expansion_token which doesn't care for any of these issues.
1307
1308    When meeting a newline, returns CPP_EOF if parsing a directive,
1309    otherwise returns to the start of the token buffer if permissible.
1310    Returns the location of the lexed token.  */
1311 cpp_token *
1312 _cpp_lex_direct (pfile)
1313      cpp_reader *pfile;
1314 {
1315   cppchar_t c;
1316   cpp_buffer *buffer;
1317   const unsigned char *comment_start;
1318   cpp_token *result = pfile->cur_token++;
1319
1320  fresh_line:
1321   buffer = pfile->buffer;
1322   result->flags = buffer->saved_flags;
1323   buffer->saved_flags = 0;
1324  update_tokens_line:
1325   result->line = pfile->line;
1326
1327  skipped_white:
1328   c = *buffer->cur++;
1329   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1330
1331  trigraph:
1332   switch (c)
1333     {
1334     case ' ': case '\t': case '\f': case '\v': case '\0':
1335       result->flags |= PREV_WHITE;
1336       if (skip_whitespace (pfile, c))
1337         goto skipped_white;
1338
1339       /* End of buffer.  */
1340       buffer->cur--;
1341       if (continue_after_nul (pfile))
1342         goto fresh_line;
1343       result->type = CPP_EOF;
1344       break;
1345
1346     case '\n': case '\r':
1347       handle_newline (pfile);
1348       buffer->saved_flags = BOL;
1349       if (! pfile->state.in_directive)
1350         {
1351           if (pfile->state.parsing_args == 2)
1352             buffer->saved_flags |= PREV_WHITE;
1353           if (!pfile->keep_tokens)
1354             {
1355               pfile->cur_run = &pfile->base_run;
1356               result = pfile->base_run.base;
1357               pfile->cur_token = result + 1;
1358             }
1359           goto fresh_line;
1360         }
1361       result->type = CPP_EOF;
1362       break;
1363
1364     case '?':
1365     case '\\':
1366       /* These could start an escaped newline, or '?' a trigraph.  Let
1367          skip_escaped_newlines do all the work.  */
1368       {
1369         unsigned int line = pfile->line;
1370
1371         c = skip_escaped_newlines (pfile);
1372         if (line != pfile->line)
1373           {
1374             buffer->cur--;
1375             /* We had at least one escaped newline of some sort.
1376                Update the token's line and column.  */
1377             goto update_tokens_line;
1378           }
1379       }
1380
1381       /* We are either the original '?' or '\\', or a trigraph.  */
1382       if (c == '?')
1383         result->type = CPP_QUERY;
1384       else if (c == '\\')
1385         goto random_char;
1386       else
1387         goto trigraph;
1388       break;
1389
1390     case '0': case '1': case '2': case '3': case '4':
1391     case '5': case '6': case '7': case '8': case '9':
1392       result->type = CPP_NUMBER;
1393       if (CPP_OPTION(pfile, pedantic_parse_number))
1394         pedantic_parse_number (pfile, &result->val.str, 0);
1395       else
1396         parse_number (pfile, &result->val.str, 0);
1397       break;
1398
1399     case 'L':
1400       /* 'L' may introduce wide characters or strings.  */
1401       {
1402         const unsigned char *pos = buffer->cur;
1403
1404         c = get_effective_char (pfile);
1405         if (c == '\'' || c == '"')
1406           {
1407             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1408             parse_string (pfile, result, c);
1409             break;
1410           }
1411         buffer->cur = pos;
1412       }
1413       /* Fall through.  */
1414
1415     start_ident:
1416     case '_':
1417     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1418     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1419     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1420     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1421     case 'y': case 'z':
1422     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1423     case 'G': case 'H': case 'I': case 'J': case 'K':
1424     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1425     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1426     case 'Y': case 'Z':
1427       result->type = CPP_NAME;
1428       result->val.node = parse_identifier (pfile);
1429
1430       /* SDCC _asm specific */
1431       /* handle _asm ... _endasm ;  */
1432       if (CPP_OPTION(pfile, preproc_asm) == 0 && result->val.node == pfile->spec_nodes.n__asm)
1433         {
1434           comment_start = buffer->cur;
1435           result->type = CPP_ASM;
1436           skip_asm_block (pfile);
1437           /* Save the _asm block as a token in its own right.  */
1438           save_asm (pfile, result, comment_start);
1439         }
1440       /* Convert named operators to their proper types.  */
1441       else if (result->val.node->flags & NODE_OPERATOR)
1442         {
1443           result->flags |= NAMED_OP;
1444           result->type = result->val.node->value.operator;
1445         }
1446       break;
1447
1448     case '\'':
1449     case '"':
1450       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1451       parse_string (pfile, result, c);
1452       break;
1453
1454     case '/':
1455       /* A potential block or line comment.  */
1456       comment_start = buffer->cur;
1457       c = get_effective_char (pfile);
1458
1459       if (c == '*')
1460         {
1461           if (skip_block_comment (pfile))
1462             cpp_error (pfile, DL_ERROR, "unterminated comment");
1463         }
1464       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1465                             || CPP_IN_SYSTEM_HEADER (pfile)))
1466         {
1467           /* Warn about comments only if pedantically GNUC89, and not
1468              in system headers.  */
1469           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1470               && ! buffer->warned_cplusplus_comments)
1471             {
1472               cpp_error (pfile, DL_PEDWARN,
1473                          "C++ style comments are not allowed in ISO C90");
1474               cpp_error (pfile, DL_PEDWARN,
1475                          "(this will be reported only once per input file)");
1476               buffer->warned_cplusplus_comments = 1;
1477             }
1478
1479           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1480             cpp_error (pfile, DL_WARNING, "multi-line comment");
1481         }
1482       else if (c == '=')
1483         {
1484           result->type = CPP_DIV_EQ;
1485           break;
1486         }
1487       else
1488         {
1489           BACKUP ();
1490           result->type = CPP_DIV;
1491           break;
1492         }
1493
1494       if (!pfile->state.save_comments)
1495         {
1496           result->flags |= PREV_WHITE;
1497           goto update_tokens_line;
1498         }
1499
1500       /* Save the comment as a token in its own right.  */
1501       save_comment (pfile, result, comment_start, c);
1502       break;
1503
1504     case '<':
1505       if (pfile->state.angled_headers)
1506         {
1507           result->type = CPP_HEADER_NAME;
1508           parse_string (pfile, result, '>');
1509           break;
1510         }
1511
1512       c = get_effective_char (pfile);
1513       if (c == '=')
1514         result->type = CPP_LESS_EQ;
1515       else if (c == '<')
1516         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1517       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1518         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1519       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1520         {
1521           result->type = CPP_OPEN_SQUARE;
1522           result->flags |= DIGRAPH;
1523         }
1524       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1525         {
1526           result->type = CPP_OPEN_BRACE;
1527           result->flags |= DIGRAPH;
1528         }
1529       else
1530         {
1531           BACKUP ();
1532           result->type = CPP_LESS;
1533         }
1534       break;
1535
1536     case '>':
1537       c = get_effective_char (pfile);
1538       if (c == '=')
1539         result->type = CPP_GREATER_EQ;
1540       else if (c == '>')
1541         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1542       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1543         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1544       else
1545         {
1546           BACKUP ();
1547           result->type = CPP_GREATER;
1548         }
1549       break;
1550
1551     case '%':
1552       c = get_effective_char (pfile);
1553       if (c == '=')
1554         result->type = CPP_MOD_EQ;
1555       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1556         {
1557           result->flags |= DIGRAPH;
1558           result->type = CPP_HASH;
1559           if (get_effective_char (pfile) == '%')
1560             {
1561               const unsigned char *pos = buffer->cur;
1562
1563               if (get_effective_char (pfile) == ':')
1564                 result->type = CPP_PASTE;
1565               else
1566                 buffer->cur = pos - 1;
1567             }
1568           else
1569             BACKUP ();
1570         }
1571       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1572         {
1573           result->flags |= DIGRAPH;
1574           result->type = CPP_CLOSE_BRACE;
1575         }
1576       else
1577         {
1578           BACKUP ();
1579           result->type = CPP_MOD;
1580         }
1581       break;
1582
1583     case '.':
1584       result->type = CPP_DOT;
1585       c = get_effective_char (pfile);
1586       if (c == '.')
1587         {
1588           const unsigned char *pos = buffer->cur;
1589
1590           if (get_effective_char (pfile) == '.')
1591             result->type = CPP_ELLIPSIS;
1592           else
1593             buffer->cur = pos - 1;
1594         }
1595       /* All known character sets have 0...9 contiguous.  */
1596       else if (ISDIGIT (c))
1597         {
1598           result->type = CPP_NUMBER;
1599           if (CPP_OPTION(pfile, pedantic_parse_number))
1600             pedantic_parse_number (pfile, &result->val.str, 1);
1601           else
1602             parse_number (pfile, &result->val.str, 1);
1603         }
1604       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1605         result->type = CPP_DOT_STAR;
1606       else
1607         BACKUP ();
1608       break;
1609
1610     case '+':
1611       c = get_effective_char (pfile);
1612       if (c == '+')
1613         result->type = CPP_PLUS_PLUS;
1614       else if (c == '=')
1615         result->type = CPP_PLUS_EQ;
1616       else
1617         {
1618           BACKUP ();
1619           result->type = CPP_PLUS;
1620         }
1621       break;
1622
1623     case '-':
1624       c = get_effective_char (pfile);
1625       if (c == '>')
1626         {
1627           result->type = CPP_DEREF;
1628           if (CPP_OPTION (pfile, cplusplus))
1629             {
1630               if (get_effective_char (pfile) == '*')
1631                 result->type = CPP_DEREF_STAR;
1632               else
1633                 BACKUP ();
1634             }
1635         }
1636       else if (c == '-')
1637         result->type = CPP_MINUS_MINUS;
1638       else if (c == '=')
1639         result->type = CPP_MINUS_EQ;
1640       else
1641         {
1642           BACKUP ();
1643           result->type = CPP_MINUS;
1644         }
1645       break;
1646
1647     case '&':
1648       c = get_effective_char (pfile);
1649       if (c == '&')
1650         result->type = CPP_AND_AND;
1651       else if (c == '=')
1652         result->type = CPP_AND_EQ;
1653       else
1654         {
1655           BACKUP ();
1656           result->type = CPP_AND;
1657         }
1658       break;
1659
1660     case '|':
1661       c = get_effective_char (pfile);
1662       if (c == '|')
1663         result->type = CPP_OR_OR;
1664       else if (c == '=')
1665         result->type = CPP_OR_EQ;
1666       else
1667         {
1668           BACKUP ();
1669           result->type = CPP_OR;
1670         }
1671       break;
1672
1673     case ':':
1674       c = get_effective_char (pfile);
1675       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1676         result->type = CPP_SCOPE;
1677       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1678         {
1679           result->flags |= DIGRAPH;
1680           result->type = CPP_CLOSE_SQUARE;
1681         }
1682       else
1683         {
1684           BACKUP ();
1685           result->type = CPP_COLON;
1686         }
1687       break;
1688
1689     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1690     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1691     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1692     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1693     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1694
1695     case '~': result->type = CPP_COMPL; break;
1696     case ',': result->type = CPP_COMMA; break;
1697     case '(': result->type = CPP_OPEN_PAREN; break;
1698     case ')': result->type = CPP_CLOSE_PAREN; break;
1699     case '[': result->type = CPP_OPEN_SQUARE; break;
1700     case ']': result->type = CPP_CLOSE_SQUARE; break;
1701     case '{': result->type = CPP_OPEN_BRACE; break;
1702     case '}': result->type = CPP_CLOSE_BRACE; break;
1703     case ';': result->type = CPP_SEMICOLON; break;
1704
1705       /* @ is a punctuator in Objective-C.  */
1706     case '@': result->type = CPP_ATSIGN; break;
1707
1708     case '$':
1709       if (CPP_OPTION (pfile, dollars_in_ident))
1710         goto start_ident;
1711       /* Fall through...  */
1712
1713     random_char:
1714     default:
1715       result->type = CPP_OTHER;
1716       result->val.c = c;
1717       break;
1718     }
1719
1720   return result;
1721 }
1722
1723 /* An upper bound on the number of bytes needed to spell TOKEN,
1724    including preceding whitespace.  */
1725 unsigned int
1726 cpp_token_len (token)
1727      const cpp_token *token;
1728 {
1729   unsigned int len;
1730
1731   switch (TOKEN_SPELL (token))
1732     {
1733     default:            len = 0;                                break;
1734     case SPELL_NUMBER:
1735     case SPELL_STRING:  len = token->val.str.len;               break;
1736     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1737     }
1738   /* 1 for whitespace, 4 for comment delimiters.  */
1739   return len + 5;
1740 }
1741
1742 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1743    already contain the enough space to hold the token's spelling.
1744    Returns a pointer to the character after the last character
1745    written.  */
1746 unsigned char *
1747 cpp_spell_token (pfile, token, buffer)
1748      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1749      const cpp_token *token;
1750      unsigned char *buffer;
1751 {
1752   switch (TOKEN_SPELL (token))
1753     {
1754     case SPELL_OPERATOR:
1755       {
1756         const unsigned char *spelling;
1757         unsigned char c;
1758
1759         if (token->flags & DIGRAPH)
1760           spelling
1761             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1762         else if (token->flags & NAMED_OP)
1763           goto spell_ident;
1764         else
1765           spelling = TOKEN_NAME (token);
1766
1767         while ((c = *spelling++) != '\0')
1768           *buffer++ = c;
1769       }
1770       break;
1771
1772     case SPELL_CHAR:
1773       *buffer++ = token->val.c;
1774       break;
1775
1776     spell_ident:
1777     case SPELL_IDENT:
1778       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1779       buffer += NODE_LEN (token->val.node);
1780       break;
1781
1782     case SPELL_NUMBER:
1783       memcpy (buffer, token->val.str.text, token->val.str.len);
1784       buffer += token->val.str.len;
1785       break;
1786
1787     case SPELL_STRING:
1788       {
1789         int left, right, tag;
1790         switch (token->type)
1791           {
1792           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1793           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1794           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1795           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1796           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1797           default:
1798             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1799                        TOKEN_NAME (token));
1800             return buffer;
1801           }
1802         if (tag) *buffer++ = tag;
1803         *buffer++ = left;
1804         memcpy (buffer, token->val.str.text, token->val.str.len);
1805         buffer += token->val.str.len;
1806         *buffer++ = right;
1807       }
1808       break;
1809
1810     case SPELL_NONE:
1811       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1812       break;
1813     }
1814
1815   return buffer;
1816 }
1817
1818 /* Returns TOKEN spelt as a null-terminated string.  The string is
1819    freed when the reader is destroyed.  Useful for diagnostics.  */
1820 unsigned char *
1821 cpp_token_as_text (pfile, token)
1822      cpp_reader *pfile;
1823      const cpp_token *token;
1824 {
1825   unsigned int len = cpp_token_len (token);
1826   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1827
1828   end = cpp_spell_token (pfile, token, start);
1829   end[0] = '\0';
1830
1831   return start;
1832 }
1833
1834 /* Used by C front ends, which really should move to using
1835    cpp_token_as_text.  */
1836 const char *
1837 cpp_type2name (type)
1838      enum cpp_ttype type;
1839 {
1840   return (const char *) token_spellings[type].name;
1841 }
1842
1843 /* Writes the spelling of token to FP, without any preceding space.
1844    Separated from cpp_spell_token for efficiency - to avoid stdio
1845    double-buffering.  */
1846 void
1847 cpp_output_token (token, fp)
1848      const cpp_token *token;
1849      FILE *fp;
1850 {
1851   switch (TOKEN_SPELL (token))
1852     {
1853     case SPELL_OPERATOR:
1854       {
1855         const unsigned char *spelling;
1856         int c;
1857
1858         if (token->flags & DIGRAPH)
1859           spelling
1860             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1861         else if (token->flags & NAMED_OP)
1862           goto spell_ident;
1863         else
1864           spelling = TOKEN_NAME (token);
1865
1866         c = *spelling;
1867         do
1868           putc (c, fp);
1869         while ((c = *++spelling) != '\0');
1870       }
1871       break;
1872
1873     case SPELL_CHAR:
1874       putc (token->val.c, fp);
1875       break;
1876
1877     spell_ident:
1878     case SPELL_IDENT:
1879       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1880     break;
1881
1882     case SPELL_NUMBER:
1883       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1884       break;
1885
1886     case SPELL_STRING:
1887       {
1888         int left, right, tag;
1889         switch (token->type)
1890           {
1891           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1892           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1893           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1894           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1895           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1896           case CPP_ASM:         left = '\0'; right = '\0'; tag = '\0'; break;
1897           default:
1898             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1899             return;
1900           }
1901         if (tag) putc (tag, fp);
1902         if (left) putc (left, fp);
1903         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1904         if (right) putc (right, fp);
1905       }
1906       break;
1907
1908     case SPELL_NONE:
1909       /* An error, most probably.  */
1910       break;
1911     }
1912 }
1913
1914 /* Compare two tokens.  */
1915 int
1916 _cpp_equiv_tokens (a, b)
1917      const cpp_token *a, *b;
1918 {
1919   if (a->type == b->type && a->flags == b->flags)
1920     switch (TOKEN_SPELL (a))
1921       {
1922       default:                  /* Keep compiler happy.  */
1923       case SPELL_OPERATOR:
1924         return 1;
1925       case SPELL_CHAR:
1926         return a->val.c == b->val.c; /* Character.  */
1927       case SPELL_NONE:
1928         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1929       case SPELL_IDENT:
1930         return a->val.node == b->val.node;
1931       case SPELL_NUMBER:
1932       case SPELL_STRING:
1933         return (a->val.str.len == b->val.str.len
1934                 && !memcmp (a->val.str.text, b->val.str.text,
1935                             a->val.str.len));
1936       }
1937
1938   return 0;
1939 }
1940
1941 /* Returns nonzero if a space should be inserted to avoid an
1942    accidental token paste for output.  For simplicity, it is
1943    conservative, and occasionally advises a space where one is not
1944    needed, e.g. "." and ".2".  */
1945 int
1946 cpp_avoid_paste (pfile, token1, token2)
1947      cpp_reader *pfile;
1948      const cpp_token *token1, *token2;
1949 {
1950   enum cpp_ttype a = token1->type, b = token2->type;
1951   cppchar_t c;
1952
1953   if (token1->flags & NAMED_OP)
1954     a = CPP_NAME;
1955   if (token2->flags & NAMED_OP)
1956     b = CPP_NAME;
1957
1958   c = EOF;
1959   if (token2->flags & DIGRAPH)
1960     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1961   else if (token_spellings[b].category == SPELL_OPERATOR)
1962     c = token_spellings[b].name[0];
1963
1964   /* Quickly get everything that can paste with an '='.  */
1965   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1966     return 1;
1967
1968   switch (a)
1969     {
1970     case CPP_GREATER:   return c == '>' || c == '?';
1971     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1972     case CPP_PLUS:      return c == '+';
1973     case CPP_MINUS:     return c == '-' || c == '>';
1974     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1975     case CPP_MOD:       return c == ':' || c == '>';
1976     case CPP_AND:       return c == '&';
1977     case CPP_OR:        return c == '|';
1978     case CPP_COLON:     return c == ':' || c == '>';
1979     case CPP_DEREF:     return c == '*';
1980     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1981     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1982     case CPP_NAME:      return ((b == CPP_NUMBER
1983                                  && name_p (pfile, &token2->val.str))
1984                                 || b == CPP_NAME
1985                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1986     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1987                                 || c == '.' || c == '+' || c == '-');
1988     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1989                                 && token1->val.c == '@'
1990                                 && (b == CPP_NAME || b == CPP_STRING));
1991     default:            break;
1992     }
1993
1994   return 0;
1995 }
1996
1997 /* Output all the remaining tokens on the current line, and a newline
1998    character, to FP.  Leading whitespace is removed.  If there are
1999    macros, special token padding is not performed.  */
2000 void
2001 cpp_output_line (pfile, fp)
2002      cpp_reader *pfile;
2003      FILE *fp;
2004 {
2005   const cpp_token *token;
2006
2007   token = cpp_get_token (pfile);
2008   while (token->type != CPP_EOF)
2009     {
2010       cpp_output_token (token, fp);
2011       token = cpp_get_token (pfile);
2012       if (token->flags & PREV_WHITE)
2013         putc (' ', fp);
2014     }
2015
2016   putc ('\n', fp);
2017 }
2018
2019 /* Returns the value of a hexadecimal digit.  */
2020 static unsigned int
2021 hex_digit_value (c)
2022      unsigned int c;
2023 {
2024   if (hex_p (c))
2025     return hex_value (c);
2026   else
2027     abort ();
2028 }
2029
2030 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
2031    failure if cpplib is not parsing C++ or C99.  Such failure is
2032    silent, and no variables are updated.  Otherwise returns 0, and
2033    warns if -Wtraditional.
2034
2035    [lex.charset]: The character designated by the universal character
2036    name \UNNNNNNNN is that character whose character short name in
2037    ISO/IEC 10646 is NNNNNNNN; the character designated by the
2038    universal character name \uNNNN is that character whose character
2039    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
2040    for a universal character name is less than 0x20 or in the range
2041    0x7F-0x9F (inclusive), or if the universal character name
2042    designates a character in the basic source character set, then the
2043    program is ill-formed.
2044
2045    We assume that wchar_t is Unicode, so we don't need to do any
2046    mapping.  Is this ever wrong?
2047
2048    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
2049    LIMIT is the end of the string or charconst.  PSTR is updated to
2050    point after the UCS on return, and the UCS is written into PC.  */
2051
2052 static int
2053 maybe_read_ucs (pfile, pstr, limit, pc)
2054      cpp_reader *pfile;
2055      const unsigned char **pstr;
2056      const unsigned char *limit;
2057      cppchar_t *pc;
2058 {
2059   const unsigned char *p = *pstr;
2060   unsigned int code = 0;
2061   unsigned int c = *pc, length;
2062
2063   /* Only attempt to interpret a UCS for C++ and C99.  */
2064   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
2065     return 1;
2066
2067   if (CPP_WTRADITIONAL (pfile))
2068     cpp_error (pfile, DL_WARNING,
2069                "the meaning of '\\%c' is different in traditional C", c);
2070
2071   length = (c == 'u' ? 4: 8);
2072
2073   if ((size_t) (limit - p) < length)
2074     {
2075       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
2076       /* Skip to the end to avoid more diagnostics.  */
2077       p = limit;
2078     }
2079   else
2080     {
2081       for (; length; length--, p++)
2082         {
2083           c = *p;
2084           if (ISXDIGIT (c))
2085             code = (code << 4) + hex_digit_value (c);
2086           else
2087             {
2088               cpp_error (pfile, DL_ERROR,
2089                          "non-hex digit '%c' in universal-character-name", c);
2090               /* We shouldn't skip in case there are multibyte chars.  */
2091               break;
2092             }
2093         }
2094     }
2095
2096 #ifdef TARGET_EBCDIC
2097   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
2098   code = 0x3f;  /* EBCDIC invalid character */
2099 #else
2100  /* True extended characters are OK.  */
2101   if (code >= 0xa0
2102       && !(code & 0x80000000)
2103       && !(code >= 0xD800 && code <= 0xDFFF))
2104     ;
2105   /* The standard permits $, @ and ` to be specified as UCNs.  We use
2106      hex escapes so that this also works with EBCDIC hosts.  */
2107   else if (code == 0x24 || code == 0x40 || code == 0x60)
2108     ;
2109   /* Don't give another error if one occurred above.  */
2110   else if (length == 0)
2111     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
2112 #endif
2113
2114   *pstr = p;
2115   *pc = code;
2116   return 0;
2117 }
2118
2119 /* Returns the value of an escape sequence, truncated to the correct
2120    target precision.  PSTR points to the input pointer, which is just
2121    after the backslash.  LIMIT is how much text we have.  WIDE is true
2122    if the escape sequence is part of a wide character constant or
2123    string literal.  Handles all relevant diagnostics.  */
2124 cppchar_t
2125 cpp_parse_escape (pfile, pstr, limit, wide)
2126      cpp_reader *pfile;
2127      const unsigned char **pstr;
2128      const unsigned char *limit;
2129      int wide;
2130 {
2131   int unknown = 0;
2132   const unsigned char *str = *pstr;
2133   cppchar_t c, mask;
2134   unsigned int width;
2135
2136   if (wide)
2137     width = CPP_OPTION (pfile, wchar_precision);
2138   else
2139     width = CPP_OPTION (pfile, char_precision);
2140   if (width < BITS_PER_CPPCHAR_T)
2141     mask = ((cppchar_t) 1 << width) - 1;
2142   else
2143     mask = ~0;
2144
2145   c = *str++;
2146   switch (c)
2147     {
2148     case '\\': case '\'': case '"': case '?': break;
2149     case 'b': c = TARGET_BS;      break;
2150     case 'f': c = TARGET_FF;      break;
2151     case 'n': c = TARGET_NEWLINE; break;
2152     case 'r': c = TARGET_CR;      break;
2153     case 't': c = TARGET_TAB;     break;
2154     case 'v': c = TARGET_VT;      break;
2155
2156     case '(': case '{': case '[': case '%':
2157       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
2158          '\%' is used to prevent SCCS from getting confused.  */
2159       unknown = CPP_PEDANTIC (pfile);
2160       break;
2161
2162     case 'a':
2163       if (CPP_WTRADITIONAL (pfile))
2164         cpp_error (pfile, DL_WARNING,
2165                    "the meaning of '\\a' is different in traditional C");
2166       c = TARGET_BELL;
2167       break;
2168
2169     case 'e': case 'E':
2170       if (CPP_PEDANTIC (pfile))
2171         cpp_error (pfile, DL_PEDWARN,
2172                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
2173       c = TARGET_ESC;
2174       break;
2175
2176     case 'u': case 'U':
2177       unknown = maybe_read_ucs (pfile, &str, limit, &c);
2178       break;
2179
2180     case 'x':
2181       if (CPP_WTRADITIONAL (pfile))
2182         cpp_error (pfile, DL_WARNING,
2183                    "the meaning of '\\x' is different in traditional C");
2184
2185       {
2186         cppchar_t i = 0, overflow = 0;
2187         int digits_found = 0;
2188
2189         while (str < limit)
2190           {
2191             c = *str;
2192             if (! ISXDIGIT (c))
2193               break;
2194             str++;
2195             overflow |= i ^ (i << 4 >> 4);
2196             i = (i << 4) + hex_digit_value (c);
2197             digits_found = 1;
2198           }
2199
2200         if (!digits_found)
2201           cpp_error (pfile, DL_ERROR,
2202                        "\\x used with no following hex digits");
2203
2204         if (overflow | (i != (i & mask)))
2205           {
2206             cpp_error (pfile, DL_PEDWARN,
2207                        "hex escape sequence out of range");
2208             i &= mask;
2209           }
2210         c = i;
2211       }
2212       break;
2213
2214     case '0':  case '1':  case '2':  case '3':
2215     case '4':  case '5':  case '6':  case '7':
2216       {
2217         size_t count = 0;
2218         cppchar_t i = c - '0';
2219
2220         while (str < limit && ++count < 3)
2221           {
2222             c = *str;
2223             if (c < '0' || c > '7')
2224               break;
2225             str++;
2226             i = (i << 3) + c - '0';
2227           }
2228
2229         if (i != (i & mask))
2230           {
2231             cpp_error (pfile, DL_PEDWARN,
2232                        "octal escape sequence out of range");
2233             i &= mask;
2234           }
2235         c = i;
2236       }
2237       break;
2238
2239     default:
2240       unknown = 1;
2241       break;
2242     }
2243
2244   if (unknown)
2245     {
2246       if (ISGRAPH (c))
2247         cpp_error (pfile, DL_PEDWARN,
2248                    "unknown escape sequence '\\%c'", (int) c);
2249       else
2250         cpp_error (pfile, DL_PEDWARN,
2251                    "unknown escape sequence: '\\%03o'", (int) c);
2252     }
2253
2254   if (c > mask)
2255     {
2256       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
2257       c &= mask;
2258     }
2259
2260   *pstr = str;
2261   return c;
2262 }
2263
2264 /* Interpret a (possibly wide) character constant in TOKEN.
2265    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
2266    points to a variable that is filled in with the number of
2267    characters seen, and UNSIGNEDP to a variable that indicates whether
2268    the result has signed type.  */
2269 cppchar_t
2270 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
2271      cpp_reader *pfile;
2272      const cpp_token *token;
2273      unsigned int *pchars_seen;
2274      int *unsignedp;
2275 {
2276   const unsigned char *str = token->val.str.text;
2277   const unsigned char *limit = str + token->val.str.len;
2278   unsigned int chars_seen = 0;
2279   size_t width, max_chars;
2280   cppchar_t c, mask, result = 0;
2281   bool unsigned_p;
2282
2283 #ifdef MULTIBYTE_CHARS
2284   (void) local_mbtowc (NULL, NULL, 0);
2285 #endif
2286
2287   /* Width in bits.  */
2288   if (token->type == CPP_CHAR)
2289     {
2290       width = CPP_OPTION (pfile, char_precision);
2291       max_chars = CPP_OPTION (pfile, int_precision) / width;
2292       unsigned_p = CPP_OPTION (pfile, unsigned_char);
2293     }
2294   else
2295     {
2296       width = CPP_OPTION (pfile, wchar_precision);
2297       max_chars = 1;
2298       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
2299     }
2300
2301   if (width < BITS_PER_CPPCHAR_T)
2302     mask = ((cppchar_t) 1 << width) - 1;
2303   else
2304     mask = ~0;
2305
2306   while (str < limit)
2307     {
2308 #ifdef MULTIBYTE_CHARS
2309       wchar_t wc;
2310       int char_len;
2311
2312       char_len = local_mbtowc (&wc, str, limit - str);
2313       if (char_len == -1)
2314         {
2315           cpp_error (pfile, DL_WARNING,
2316                      "ignoring invalid multibyte character");
2317           c = *str++;
2318         }
2319       else
2320         {
2321           str += char_len;
2322           c = wc;
2323         }
2324 #else
2325       c = *str++;
2326 #endif
2327
2328       if (c == '\\')
2329         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
2330
2331 #ifdef MAP_CHARACTER
2332       if (ISPRINT (c))
2333         c = MAP_CHARACTER (c);
2334 #endif
2335
2336       chars_seen++;
2337
2338       /* Truncate the character, scale the result and merge the two.  */
2339       c &= mask;
2340       if (width < BITS_PER_CPPCHAR_T)
2341         result = (result << width) | c;
2342       else
2343         result = c;
2344     }
2345
2346   if (chars_seen == 0)
2347     cpp_error (pfile, DL_ERROR, "empty character constant");
2348   else if (chars_seen > 1)
2349     {
2350       /* Multichar charconsts are of type int and therefore signed.  */
2351       unsigned_p = 0;
2352
2353       if (chars_seen > max_chars)
2354         {
2355           chars_seen = max_chars;
2356           cpp_error (pfile, DL_WARNING,
2357                      "character constant too long for its type");
2358         }
2359       else if (CPP_OPTION (pfile, warn_multichar))
2360         cpp_error (pfile, DL_WARNING, "multi-character character constant");
2361     }
2362
2363   /* Sign-extend or truncate the constant to cppchar_t.  The value is
2364      in WIDTH bits, but for multi-char charconsts it's value is the
2365      full target type's width.  */
2366   if (chars_seen > 1)
2367     width *= max_chars;
2368   if (width < BITS_PER_CPPCHAR_T)
2369     {
2370       mask = ((cppchar_t) 1 << width) - 1;
2371       if (unsigned_p || !(result & (1 << (width - 1))))
2372         result &= mask;
2373       else
2374         result |= ~mask;
2375     }
2376
2377   *pchars_seen = chars_seen;
2378   *unsignedp = unsigned_p;
2379   return result;
2380 }
2381
2382 /* Memory buffers.  Changing these three constants can have a dramatic
2383    effect on performance.  The values here are reasonable defaults,
2384    but might be tuned.  If you adjust them, be sure to test across a
2385    range of uses of cpplib, including heavy nested function-like macro
2386    expansion.  Also check the change in peak memory usage (NJAMD is a
2387    good tool for this).  */
2388 #define MIN_BUFF_SIZE 8000
2389 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2390 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2391         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2392
2393 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2394   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2395 #endif
2396
2397 /* Create a new allocation buffer.  Place the control block at the end
2398    of the buffer, so that buffer overflows will cause immediate chaos.  */
2399 static _cpp_buff *
2400 new_buff (len)
2401      size_t len;
2402 {
2403   _cpp_buff *result;
2404   unsigned char *base;
2405
2406   if (len < MIN_BUFF_SIZE)
2407     len = MIN_BUFF_SIZE;
2408   len = CPP_ALIGN (len);
2409
2410   base = xmalloc (len + sizeof (_cpp_buff));
2411   result = (_cpp_buff *) (base + len);
2412   result->base = base;
2413   result->cur = base;
2414   result->limit = base + len;
2415   result->next = NULL;
2416   return result;
2417 }
2418
2419 /* Place a chain of unwanted allocation buffers on the free list.  */
2420 void
2421 _cpp_release_buff (pfile, buff)
2422      cpp_reader *pfile;
2423      _cpp_buff *buff;
2424 {
2425   _cpp_buff *end = buff;
2426
2427   while (end->next)
2428     end = end->next;
2429   end->next = pfile->free_buffs;
2430   pfile->free_buffs = buff;
2431 }
2432
2433 /* Return a free buffer of size at least MIN_SIZE.  */
2434 _cpp_buff *
2435 _cpp_get_buff (pfile, min_size)
2436      cpp_reader *pfile;
2437      size_t min_size;
2438 {
2439   _cpp_buff *result, **p;
2440
2441   for (p = &pfile->free_buffs;; p = &(*p)->next)
2442     {
2443       size_t size;
2444
2445       if (*p == NULL)
2446         return new_buff (min_size);
2447       result = *p;
2448       size = result->limit - result->base;
2449       /* Return a buffer that's big enough, but don't waste one that's
2450          way too big.  */
2451       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2452         break;
2453     }
2454
2455   *p = result->next;
2456   result->next = NULL;
2457   result->cur = result->base;
2458   return result;
2459 }
2460
2461 /* Creates a new buffer with enough space to hold the uncommitted
2462    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2463    the excess bytes to the new buffer.  Chains the new buffer after
2464    BUFF, and returns the new buffer.  */
2465 _cpp_buff *
2466 _cpp_append_extend_buff (pfile, buff, min_extra)
2467      cpp_reader *pfile;
2468      _cpp_buff *buff;
2469      size_t min_extra;
2470 {
2471   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2472   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2473
2474   buff->next = new_buff;
2475   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2476   return new_buff;
2477 }
2478
2479 /* Creates a new buffer with enough space to hold the uncommitted
2480    remaining bytes of the buffer pointed to by BUFF, and at least
2481    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2482    Chains the new buffer before the buffer pointed to by BUFF, and
2483    updates the pointer to point to the new buffer.  */
2484 void
2485 _cpp_extend_buff (pfile, pbuff, min_extra)
2486      cpp_reader *pfile;
2487      _cpp_buff **pbuff;
2488      size_t min_extra;
2489 {
2490   _cpp_buff *new_buff, *old_buff = *pbuff;
2491   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2492
2493   new_buff = _cpp_get_buff (pfile, size);
2494   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2495   new_buff->next = old_buff;
2496   *pbuff = new_buff;
2497 }
2498
2499 /* Free a chain of buffers starting at BUFF.  */
2500 void
2501 _cpp_free_buff (buff)
2502      _cpp_buff *buff;
2503 {
2504   _cpp_buff *next;
2505
2506   for (; buff; buff = next)
2507     {
2508       next = buff->next;
2509       free (buff->base);
2510     }
2511 }
2512
2513 /* Allocate permanent, unaligned storage of length LEN.  */
2514 unsigned char *
2515 _cpp_unaligned_alloc (pfile, len)
2516      cpp_reader *pfile;
2517      size_t len;
2518 {
2519   _cpp_buff *buff = pfile->u_buff;
2520   unsigned char *result = buff->cur;
2521
2522   if (len > (size_t) (buff->limit - result))
2523     {
2524       buff = _cpp_get_buff (pfile, len);
2525       buff->next = pfile->u_buff;
2526       pfile->u_buff = buff;
2527       result = buff->cur;
2528     }
2529
2530   buff->cur = result + len;
2531   return result;
2532 }
2533
2534 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2535    That buffer is used for growing allocations when saving macro
2536    replacement lists in a #define, and when parsing an answer to an
2537    assertion in #assert, #unassert or #if (and therefore possibly
2538    whilst expanding macros).  It therefore must not be used by any
2539    code that they might call: specifically the lexer and the guts of
2540    the macro expander.
2541
2542    All existing other uses clearly fit this restriction: storing
2543    registered pragmas during initialization.  */
2544 unsigned char *
2545 _cpp_aligned_alloc (pfile, len)
2546      cpp_reader *pfile;
2547      size_t len;
2548 {
2549   _cpp_buff *buff = pfile->a_buff;
2550   unsigned char *result = buff->cur;
2551
2552   if (len > (size_t) (buff->limit - result))
2553     {
2554       buff = _cpp_get_buff (pfile, len);
2555       buff->next = pfile->a_buff;
2556       pfile->a_buff = buff;
2557       result = buff->cur;
2558     }
2559
2560   buff->cur = result + len;
2561   return result;
2562 }