git.gag.com Git - debian/tar/blob - gnu/fnmatch_loop.c

   1 /* -*- buffer-read-only: t -*- vi: set ro: */
   2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
   3 /* Copyright (C) 1991-1993, 1996-2006, 2009-2013 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  18
  19 /* Match STRING against the file name pattern PATTERN, returning zero if
  20    it matches, nonzero if not.  */
  21 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  22                 const CHAR *string_end, bool no_leading_period, int flags)
  23      internal_function;
  24 static const CHAR *END (const CHAR *patternp) internal_function;
  25
  26 static int
  27 internal_function
  28 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  29      bool no_leading_period, int flags)
  30 {
  31   register const CHAR *p = pattern, *n = string;
  32   register UCHAR c;
  33 #ifdef _LIBC
  34 # if WIDE_CHAR_VERSION
  35   const char *collseq = (const char *)
  36     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  37 # else
  38   const UCHAR *collseq = (const UCHAR *)
  39     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  40 # endif
  41 #endif
  42
  43   while ((c = *p++) != L_('\0'))
  44     {
  45       bool new_no_leading_period = false;
  46       c = FOLD (c);
  47
  48       switch (c)
  49         {
  50         case L_('?'):
  51           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  52             {
  53               int res;
  54
  55               res = EXT (c, p, n, string_end, no_leading_period,
  56                          flags);
  57               if (res != -1)
  58                 return res;
  59             }
  60
  61           if (n == string_end)
  62             return FNM_NOMATCH;
  63           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
  64             return FNM_NOMATCH;
  65           else if (*n == L_('.') && no_leading_period)
  66             return FNM_NOMATCH;
  67           break;
  68
  69         case L_('\\'):
  70           if (!(flags & FNM_NOESCAPE))
  71             {
  72               c = *p++;
  73               if (c == L_('\0'))
  74                 /* Trailing \ loses.  */
  75                 return FNM_NOMATCH;
  76               c = FOLD (c);
  77             }
  78           if (n == string_end || FOLD ((UCHAR) *n) != c)
  79             return FNM_NOMATCH;
  80           break;
  81
  82         case L_('*'):
  83           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  84             {
  85               int res;
  86
  87               res = EXT (c, p, n, string_end, no_leading_period,
  88                          flags);
  89               if (res != -1)
  90                 return res;
  91             }
  92
  93           if (n != string_end && *n == L_('.') && no_leading_period)
  94             return FNM_NOMATCH;
  95
  96           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
  97             {
  98               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
  99                 {
 100                   const CHAR *endp = END (p);
 101                   if (endp != p)
 102                     {
 103                       /* This is a pattern.  Skip over it.  */
 104                       p = endp;
 105                       continue;
 106                     }
 107                 }
 108
 109               if (c == L_('?'))
 110                 {
 111                   /* A ? needs to match one character.  */
 112                   if (n == string_end)
 113                     /* There isn't another character; no match.  */
 114                     return FNM_NOMATCH;
 115                   else if (*n == L_('/')
 116                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 117                     /* A slash does not match a wildcard under
 118                        FNM_FILE_NAME.  */
 119                     return FNM_NOMATCH;
 120                   else
 121                     /* One character of the string is consumed in matching
 122                        this ? wildcard, so *??? won't match if there are
 123                        less than three characters.  */
 124                     ++n;
 125                 }
 126             }
 127
 128           if (c == L_('\0'))
 129             /* The wildcard(s) is/are the last element of the pattern.
 130                If the name is a file name and contains another slash
 131                this means it cannot match, unless the FNM_LEADING_DIR
 132                flag is set.  */
 133             {
 134               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 135
 136               if (flags & FNM_FILE_NAME)
 137                 {
 138                   if (flags & FNM_LEADING_DIR)
 139                     result = 0;
 140                   else
 141                     {
 142                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
 143                         result = 0;
 144                     }
 145                 }
 146
 147               return result;
 148             }
 149           else
 150             {
 151               const CHAR *endp;
 152
 153               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
 154                              string_end - n);
 155               if (endp == NULL)
 156                 endp = string_end;
 157
 158               if (c == L_('[')
 159                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 160                       && (c == L_('@') || c == L_('+') || c == L_('!'))
 161                       && *p == L_('(')))
 162                 {
 163                   int flags2 = ((flags & FNM_FILE_NAME)
 164                                 ? flags : (flags & ~FNM_PERIOD));
 165                   bool no_leading_period2 = no_leading_period;
 166
 167                   for (--p; n < endp; ++n, no_leading_period2 = false)
 168                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 169                         == 0)
 170                       return 0;
 171                 }
 172               else if (c == L_('/') && (flags & FNM_FILE_NAME))
 173                 {
 174                   while (n < string_end && *n != L_('/'))
 175                     ++n;
 176                   if (n < string_end && *n == L_('/')
 177                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 178                           == 0))
 179                     return 0;
 180                 }
 181               else
 182                 {
 183                   int flags2 = ((flags & FNM_FILE_NAME)
 184                                 ? flags : (flags & ~FNM_PERIOD));
 185                   int no_leading_period2 = no_leading_period;
 186
 187                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
 188                     c = *p;
 189                   c = FOLD (c);
 190                   for (--p; n < endp; ++n, no_leading_period2 = false)
 191                     if (FOLD ((UCHAR) *n) == c
 192                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 193                             == 0))
 194                       return 0;
 195                 }
 196             }
 197
 198           /* If we come here no match is possible with the wildcard.  */
 199           return FNM_NOMATCH;
 200
 201         case L_('['):
 202           {
 203             /* Nonzero if the sense of the character class is inverted.  */
 204             const CHAR *p_init = p;
 205             const CHAR *n_init = n;
 206             register bool not;
 207             CHAR cold;
 208             UCHAR fn;
 209
 210             if (posixly_correct == 0)
 211               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 212
 213             if (n == string_end)
 214               return FNM_NOMATCH;
 215
 216             if (*n == L_('.') && no_leading_period)
 217               return FNM_NOMATCH;
 218
 219             if (*n == L_('/') && (flags & FNM_FILE_NAME))
 220               /* '/' cannot be matched.  */
 221               return FNM_NOMATCH;
 222
 223             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
 224             if (not)
 225               ++p;
 226
 227             fn = FOLD ((UCHAR) *n);
 228
 229             c = *p++;
 230             for (;;)
 231               {
 232                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 233                   {
 234                     if (*p == L_('\0'))
 235                       return FNM_NOMATCH;
 236                     c = FOLD ((UCHAR) *p);
 237                     ++p;
 238
 239                     goto normal_bracket;
 240                   }
 241                 else if (c == L_('[') && *p == L_(':'))
 242                   {
 243                     /* Leave room for the null.  */
 244                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 245                     size_t c1 = 0;
 246 #if defined _LIBC || WIDE_CHAR_SUPPORT
 247                     wctype_t wt;
 248 #endif
 249                     const CHAR *startp = p;
 250
 251                     for (;;)
 252                       {
 253                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 254                           /* The name is too long and therefore the pattern
 255                              is ill-formed.  */
 256                           return FNM_NOMATCH;
 257
 258                         c = *++p;
 259                         if (c == L_(':') && p[1] == L_(']'))
 260                           {
 261                             p += 2;
 262                             break;
 263                           }
 264                         if (c < L_('a') || c >= L_('z'))
 265                           {
 266                             /* This cannot possibly be a character class name.
 267                                Match it as a normal range.  */
 268                             p = startp;
 269                             c = L_('[');
 270                             goto normal_bracket;
 271                           }
 272                         str[c1++] = c;
 273                       }
 274                     str[c1] = L_('\0');
 275
 276 #if defined _LIBC || WIDE_CHAR_SUPPORT
 277                     wt = IS_CHAR_CLASS (str);
 278                     if (wt == 0)
 279                       /* Invalid character class name.  */
 280                       return FNM_NOMATCH;
 281
 282 # if defined _LIBC && ! WIDE_CHAR_VERSION
 283                     /* The following code is glibc specific but does
 284                        there a good job in speeding up the code since
 285                        we can avoid the btowc() call.  */
 286                     if (_ISCTYPE ((UCHAR) *n, wt))
 287                       goto matched;
 288 # else
 289                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 290                       goto matched;
 291 # endif
 292 #else
 293                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
 294                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
 295                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
 296                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
 297                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
 298                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
 299                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
 300                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
 301                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
 302                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
 303                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
 304                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
 305                       goto matched;
 306 #endif
 307                     c = *p++;
 308                   }
 309 #ifdef _LIBC
 310                 else if (c == L_('[') && *p == L_('='))
 311                   {
 312                     UCHAR str[1];
 313                     uint32_t nrules =
 314                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 315                     const CHAR *startp = p;
 316
 317                     c = *++p;
 318                     if (c == L_('\0'))
 319                       {
 320                         p = startp;
 321                         c = L_('[');
 322                         goto normal_bracket;
 323                       }
 324                     str[0] = c;
 325
 326                     c = *++p;
 327                     if (c != L_('=') || p[1] != L_(']'))
 328                       {
 329                         p = startp;
 330                         c = L_('[');
 331                         goto normal_bracket;
 332                       }
 333                     p += 2;
 334
 335                     if (nrules == 0)
 336                       {
 337                         if ((UCHAR) *n == str[0])
 338                           goto matched;
 339                       }
 340                     else
 341                       {
 342                         const int32_t *table;
 343 # if WIDE_CHAR_VERSION
 344                         const int32_t *weights;
 345                         const int32_t *extra;
 346 # else
 347                         const unsigned char *weights;
 348                         const unsigned char *extra;
 349 # endif
 350                         const int32_t *indirect;
 351                         int32_t idx;
 352                         const UCHAR *cp = (const UCHAR *) str;
 353
 354                         /* This #include defines a local function!  */
 355 # if WIDE_CHAR_VERSION
 356 #  include <locale/weightwc.h>
 357 # else
 358 #  include <locale/weight.h>
 359 # endif
 360
 361 # if WIDE_CHAR_VERSION
 362                         table = (const int32_t *)
 363                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 364                         weights = (const int32_t *)
 365                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 366                         extra = (const int32_t *)
 367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 368                         indirect = (const int32_t *)
 369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 370 # else
 371                         table = (const int32_t *)
 372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 373                         weights = (const unsigned char *)
 374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 375                         extra = (const unsigned char *)
 376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 377                         indirect = (const int32_t *)
 378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 379 # endif
 380
 381                         idx = findidx (&cp);
 382                         if (idx != 0)
 383                           {
 384                             /* We found a table entry.  Now see whether the
 385                                character we are currently at has the same
 386                                equivalence class value.  */
 387                             int len = weights[idx & 0xffffff];
 388                             int32_t idx2;
 389                             const UCHAR *np = (const UCHAR *) n;
 390
 391                             idx2 = findidx (&np);
 392                             if (idx2 != 0
 393                                 && (idx >> 24) == (idx2 >> 24)
 394                                 && len == weights[idx2 & 0xffffff])
 395                               {
 396                                 int cnt = 0;
 397
 398                                 idx &= 0xffffff;
 399                                 idx2 &= 0xffffff;
 400
 401                                 while (cnt < len
 402                                        && (weights[idx + 1 + cnt]
 403                                            == weights[idx2 + 1 + cnt]))
 404                                   ++cnt;
 405
 406                                 if (cnt == len)
 407                                   goto matched;
 408                               }
 409                           }
 410                       }
 411
 412                     c = *p++;
 413                   }
 414 #endif
 415                 else if (c == L_('\0'))
 416                   {
 417                     /* [ unterminated, treat as normal character.  */
 418                     p = p_init;
 419                     n = n_init;
 420                     c = L_('[');
 421                     goto normal_match;
 422                   }
 423                 else
 424                   {
 425                     bool is_range = false;
 426
 427 #ifdef _LIBC
 428                     bool is_seqval = false;
 429
 430                     if (c == L_('[') && *p == L_('.'))
 431                       {
 432                         uint32_t nrules =
 433                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 434                         const CHAR *startp = p;
 435                         size_t c1 = 0;
 436
 437                         while (1)
 438                           {
 439                             c = *++p;
 440                             if (c == L_('.') && p[1] == L_(']'))
 441                               {
 442                                 p += 2;
 443                                 break;
 444                               }
 445                             if (c == '\0')
 446                               return FNM_NOMATCH;
 447                             ++c1;
 448                           }
 449
 450                         /* We have to handling the symbols differently in
 451                            ranges since then the collation sequence is
 452                            important.  */
 453                         is_range = *p == L_('-') && p[1] != L_('\0');
 454
 455                         if (nrules == 0)
 456                           {
 457                             /* There are no names defined in the collation
 458                                data.  Therefore we only accept the trivial
 459                                names consisting of the character itself.  */
 460                             if (c1 != 1)
 461                               return FNM_NOMATCH;
 462
 463                             if (!is_range && *n == startp[1])
 464                               goto matched;
 465
 466                             cold = startp[1];
 467                             c = *p++;
 468                           }
 469                         else
 470                           {
 471                             int32_t table_size;
 472                             const int32_t *symb_table;
 473 # ifdef WIDE_CHAR_VERSION
 474                             char str[c1];
 475                             size_t strcnt;
 476 # else
 477 #  define str (startp + 1)
 478 # endif
 479                             const unsigned char *extra;
 480                             int32_t idx;
 481                             int32_t elem;
 482                             int32_t second;
 483                             int32_t hash;
 484
 485 # ifdef WIDE_CHAR_VERSION
 486                             /* We have to convert the name to a single-byte
 487                                string.  This is possible since the names
 488                                consist of ASCII characters and the internal
 489                                representation is UCS4.  */
 490                             for (strcnt = 0; strcnt < c1; ++strcnt)
 491                               str[strcnt] = startp[1 + strcnt];
 492 # endif
 493
 494                             table_size =
 495                               _NL_CURRENT_WORD (LC_COLLATE,
 496                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 497                             symb_table = (const int32_t *)
 498                               _NL_CURRENT (LC_COLLATE,
 499                                            _NL_COLLATE_SYMB_TABLEMB);
 500                             extra = (const unsigned char *)
 501                               _NL_CURRENT (LC_COLLATE,
 502                                            _NL_COLLATE_SYMB_EXTRAMB);
 503
 504                             /* Locate the character in the hashing table.  */
 505                             hash = elem_hash (str, c1);
 506
 507                             idx = 0;
 508                             elem = hash % table_size;
 509                             if (symb_table[2 * elem] != 0)
 510                               {
 511                                 second = hash % (table_size - 2) + 1;
 512
 513                                 do
 514                                   {
 515                                     /* First compare the hashing value.  */
 516                                     if (symb_table[2 * elem] == hash
 517                                         && (c1
 518                                             == extra[symb_table[2 * elem + 1]])
 519                                         && memcmp (str,
 520                                                    &extra[symb_table[2 * elem
 521                                                                      + 1]
 522                                                           + 1], c1) == 0)
 523                                       {
 524                                         /* Yep, this is the entry.  */
 525                                         idx = symb_table[2 * elem + 1];
 526                                         idx += 1 + extra[idx];
 527                                         break;
 528                                       }
 529
 530                                     /* Next entry.  */
 531                                     elem += second;
 532                                   }
 533                                 while (symb_table[2 * elem] != 0);
 534                               }
 535
 536                             if (symb_table[2 * elem] != 0)
 537                               {
 538                                 /* Compare the byte sequence but only if
 539                                    this is not part of a range.  */
 540 # ifdef WIDE_CHAR_VERSION
 541                                 int32_t *wextra;
 542
 543                                 idx += 1 + extra[idx];
 544                                 /* Adjust for the alignment.  */
 545                                 idx = (idx + 3) & ~3;
 546
 547                                 wextra = (int32_t *) &extra[idx + 4];
 548 # endif
 549
 550                                 if (! is_range)
 551                                   {
 552 # ifdef WIDE_CHAR_VERSION
 553                                     for (c1 = 0;
 554                                          (int32_t) c1 < wextra[idx];
 555                                          ++c1)
 556                                       if (n[c1] != wextra[1 + c1])
 557                                         break;
 558
 559                                     if ((int32_t) c1 == wextra[idx])
 560                                       goto matched;
 561 # else
 562                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 563                                       if (n[c1] != extra[1 + c1])
 564                                         break;
 565
 566                                     if (c1 == extra[idx])
 567                                       goto matched;
 568 # endif
 569                                   }
 570
 571                                 /* Get the collation sequence value.  */
 572                                 is_seqval = true;
 573 # ifdef WIDE_CHAR_VERSION
 574                                 cold = wextra[1 + wextra[idx]];
 575 # else
 576                                 /* Adjust for the alignment.  */
 577                                 idx += 1 + extra[idx];
 578                                 idx = (idx + 3) & ~4;
 579                                 cold = *((int32_t *) &extra[idx]);
 580 # endif
 581
 582                                 c = *p++;
 583                               }
 584                             else if (c1 == 1)
 585                               {
 586                                 /* No valid character.  Match it as a
 587                                    single byte.  */
 588                                 if (!is_range && *n == str[0])
 589                                   goto matched;
 590
 591                                 cold = str[0];
 592                                 c = *p++;
 593                               }
 594                             else
 595                               return FNM_NOMATCH;
 596                           }
 597                       }
 598                     else
 599 # undef str
 600 #endif
 601                       {
 602                         c = FOLD (c);
 603                       normal_bracket:
 604
 605                         /* We have to handling the symbols differently in
 606                            ranges since then the collation sequence is
 607                            important.  */
 608                         is_range = (*p == L_('-') && p[1] != L_('\0')
 609                                     && p[1] != L_(']'));
 610
 611                         if (!is_range && c == fn)
 612                           goto matched;
 613
 614 #if _LIBC
 615                         /* This is needed if we goto normal_bracket; from
 616                            outside of is_seqval's scope.  */
 617                         is_seqval = false;
 618 #endif
 619
 620                         cold = c;
 621                         c = *p++;
 622                       }
 623
 624                     if (c == L_('-') && *p != L_(']'))
 625                       {
 626 #if _LIBC
 627                         /* We have to find the collation sequence
 628                            value for C.  Collation sequence is nothing
 629                            we can regularly access.  The sequence
 630                            value is defined by the order in which the
 631                            definitions of the collation values for the
 632                            various characters appear in the source
 633                            file.  A strange concept, nowhere
 634                            documented.  */
 635                         uint32_t fcollseq;
 636                         uint32_t lcollseq;
 637                         UCHAR cend = *p++;
 638
 639 # ifdef WIDE_CHAR_VERSION
 640                         /* Search in the 'names' array for the characters.  */
 641                         fcollseq = __collseq_table_lookup (collseq, fn);
 642                         if (fcollseq == ~((uint32_t) 0))
 643                           /* XXX We don't know anything about the character
 644                              we are supposed to match.  This means we are
 645                              failing.  */
 646                           goto range_not_matched;
 647
 648                         if (is_seqval)
 649                           lcollseq = cold;
 650                         else
 651                           lcollseq = __collseq_table_lookup (collseq, cold);
 652 # else
 653                         fcollseq = collseq[fn];
 654                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 655 # endif
 656
 657                         is_seqval = false;
 658                         if (cend == L_('[') && *p == L_('.'))
 659                           {
 660                             uint32_t nrules =
 661                               _NL_CURRENT_WORD (LC_COLLATE,
 662                                                 _NL_COLLATE_NRULES);
 663                             const CHAR *startp = p;
 664                             size_t c1 = 0;
 665
 666                             while (1)
 667                               {
 668                                 c = *++p;
 669                                 if (c == L_('.') && p[1] == L_(']'))
 670                                   {
 671                                     p += 2;
 672                                     break;
 673                                   }
 674                                 if (c == '\0')
 675                                   return FNM_NOMATCH;
 676                                 ++c1;
 677                               }
 678
 679                             if (nrules == 0)
 680                               {
 681                                 /* There are no names defined in the
 682                                    collation data.  Therefore we only
 683                                    accept the trivial names consisting
 684                                    of the character itself.  */
 685                                 if (c1 != 1)
 686                                   return FNM_NOMATCH;
 687
 688                                 cend = startp[1];
 689                               }
 690                             else
 691                               {
 692                                 int32_t table_size;
 693                                 const int32_t *symb_table;
 694 # ifdef WIDE_CHAR_VERSION
 695                                 char str[c1];
 696                                 size_t strcnt;
 697 # else
 698 #  define str (startp + 1)
 699 # endif
 700                                 const unsigned char *extra;
 701                                 int32_t idx;
 702                                 int32_t elem;
 703                                 int32_t second;
 704                                 int32_t hash;
 705
 706 # ifdef WIDE_CHAR_VERSION
 707                                 /* We have to convert the name to a single-byte
 708                                    string.  This is possible since the names
 709                                    consist of ASCII characters and the internal
 710                                    representation is UCS4.  */
 711                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 712                                   str[strcnt] = startp[1 + strcnt];
 713 # endif
 714
 715                                 table_size =
 716                                   _NL_CURRENT_WORD (LC_COLLATE,
 717                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 718                                 symb_table = (const int32_t *)
 719                                   _NL_CURRENT (LC_COLLATE,
 720                                                _NL_COLLATE_SYMB_TABLEMB);
 721                                 extra = (const unsigned char *)
 722                                   _NL_CURRENT (LC_COLLATE,
 723                                                _NL_COLLATE_SYMB_EXTRAMB);
 724
 725                                 /* Locate the character in the hashing
 726                                    table.  */
 727                                 hash = elem_hash (str, c1);
 728
 729                                 idx = 0;
 730                                 elem = hash % table_size;
 731                                 if (symb_table[2 * elem] != 0)
 732                                   {
 733                                     second = hash % (table_size - 2) + 1;
 734
 735                                     do
 736                                       {
 737                                         /* First compare the hashing value.  */
 738                                         if (symb_table[2 * elem] == hash
 739                                             && (c1
 740                                                 == extra[symb_table[2 * elem + 1]])
 741                                             && memcmp (str,
 742                                                        &extra[symb_table[2 * elem + 1]
 743                                                               + 1], c1) == 0)
 744                                           {
 745                                             /* Yep, this is the entry.  */
 746                                             idx = symb_table[2 * elem + 1];
 747                                             idx += 1 + extra[idx];
 748                                             break;
 749                                           }
 750
 751                                         /* Next entry.  */
 752                                         elem += second;
 753                                       }
 754                                     while (symb_table[2 * elem] != 0);
 755                                   }
 756
 757                                 if (symb_table[2 * elem] != 0)
 758                                   {
 759                                     /* Compare the byte sequence but only if
 760                                        this is not part of a range.  */
 761 # ifdef WIDE_CHAR_VERSION
 762                                     int32_t *wextra;
 763
 764                                     idx += 1 + extra[idx];
 765                                     /* Adjust for the alignment.  */
 766                                     idx = (idx + 3) & ~4;
 767
 768                                     wextra = (int32_t *) &extra[idx + 4];
 769 # endif
 770                                     /* Get the collation sequence value.  */
 771                                     is_seqval = true;
 772 # ifdef WIDE_CHAR_VERSION
 773                                     cend = wextra[1 + wextra[idx]];
 774 # else
 775                                     /* Adjust for the alignment.  */
 776                                     idx += 1 + extra[idx];
 777                                     idx = (idx + 3) & ~4;
 778                                     cend = *((int32_t *) &extra[idx]);
 779 # endif
 780                                   }
 781                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 782                                   {
 783                                     cend = str[0];
 784                                     c = *p++;
 785                                   }
 786                                 else
 787                                   return FNM_NOMATCH;
 788                               }
 789 # undef str
 790                           }
 791                         else
 792                           {
 793                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 794                               cend = *p++;
 795                             if (cend == L_('\0'))
 796                               return FNM_NOMATCH;
 797                             cend = FOLD (cend);
 798                           }
 799
 800                         /* XXX It is not entirely clear to me how to handle
 801                            characters which are not mentioned in the
 802                            collation specification.  */
 803                         if (
 804 # ifdef WIDE_CHAR_VERSION
 805                             lcollseq == 0xffffffff ||
 806 # endif
 807                             lcollseq <= fcollseq)
 808                           {
 809                             /* We have to look at the upper bound.  */
 810                             uint32_t hcollseq;
 811
 812                             if (is_seqval)
 813                               hcollseq = cend;
 814                             else
 815                               {
 816 # ifdef WIDE_CHAR_VERSION
 817                                 hcollseq =
 818                                   __collseq_table_lookup (collseq, cend);
 819                                 if (hcollseq == ~((uint32_t) 0))
 820                                   {
 821                                     /* Hum, no information about the upper
 822                                        bound.  The matching succeeds if the
 823                                        lower bound is matched exactly.  */
 824                                     if (lcollseq != fcollseq)
 825                                       goto range_not_matched;
 826
 827                                     goto matched;
 828                                   }
 829 # else
 830                                 hcollseq = collseq[cend];
 831 # endif
 832                               }
 833
 834                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 835                               goto matched;
 836                           }
 837 # ifdef WIDE_CHAR_VERSION
 838                       range_not_matched:
 839 # endif
 840 #else
 841                         /* We use a boring value comparison of the character
 842                            values.  This is better than comparing using
 843                            'strcoll' since the latter would have surprising
 844                            and sometimes fatal consequences.  */
 845                         UCHAR cend = *p++;
 846
 847                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 848                           cend = *p++;
 849                         if (cend == L_('\0'))
 850                           return FNM_NOMATCH;
 851
 852                         /* It is a range.  */
 853                         if (cold <= fn && fn <= cend)
 854                           goto matched;
 855 #endif
 856
 857                         c = *p++;
 858                       }
 859                   }
 860
 861                 if (c == L_(']'))
 862                   break;
 863               }
 864
 865             if (!not)
 866               return FNM_NOMATCH;
 867             break;
 868
 869           matched:
 870             /* Skip the rest of the [...] that already matched.  */
 871             do
 872               {
 873               ignore_next:
 874                 c = *p++;
 875
 876                 if (c == L_('\0'))
 877                   /* [... (unterminated) loses.  */
 878                   return FNM_NOMATCH;
 879
 880                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 881                   {
 882                     if (*p == L_('\0'))
 883                       return FNM_NOMATCH;
 884                     /* XXX 1003.2d11 is unclear if this is right.  */
 885                     ++p;
 886                   }
 887                 else if (c == L_('[') && *p == L_(':'))
 888                   {
 889                     int c1 = 0;
 890                     const CHAR *startp = p;
 891
 892                     while (1)
 893                       {
 894                         c = *++p;
 895                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 896                           return FNM_NOMATCH;
 897
 898                         if (*p == L_(':') && p[1] == L_(']'))
 899                           break;
 900
 901                         if (c < L_('a') || c >= L_('z'))
 902                           {
 903                             p = startp;
 904                             goto ignore_next;
 905                           }
 906                       }
 907                     p += 2;
 908                     c = *p++;
 909                   }
 910                 else if (c == L_('[') && *p == L_('='))
 911                   {
 912                     c = *++p;
 913                     if (c == L_('\0'))
 914                       return FNM_NOMATCH;
 915                     c = *++p;
 916                     if (c != L_('=') || p[1] != L_(']'))
 917                       return FNM_NOMATCH;
 918                     p += 2;
 919                     c = *p++;
 920                   }
 921                 else if (c == L_('[') && *p == L_('.'))
 922                   {
 923                     ++p;
 924                     while (1)
 925                       {
 926                         c = *++p;
 927                         if (c == '\0')
 928                           return FNM_NOMATCH;
 929
 930                         if (*p == L_('.') && p[1] == L_(']'))
 931                           break;
 932                       }
 933                     p += 2;
 934                     c = *p++;
 935                   }
 936               }
 937             while (c != L_(']'));
 938             if (not)
 939               return FNM_NOMATCH;
 940           }
 941           break;
 942
 943         case L_('+'):
 944         case L_('@'):
 945         case L_('!'):
 946           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 947             {
 948               int res;
 949
 950               res = EXT (c, p, n, string_end, no_leading_period, flags);
 951               if (res != -1)
 952                 return res;
 953             }
 954           goto normal_match;
 955
 956         case L_('/'):
 957           if (NO_LEADING_PERIOD (flags))
 958             {
 959               if (n == string_end || c != (UCHAR) *n)
 960                 return FNM_NOMATCH;
 961
 962               new_no_leading_period = true;
 963               break;
 964             }
 965           /* FALLTHROUGH */
 966         default:
 967         normal_match:
 968           if (n == string_end || c != FOLD ((UCHAR) *n))
 969             return FNM_NOMATCH;
 970         }
 971
 972       no_leading_period = new_no_leading_period;
 973       ++n;
 974     }
 975
 976   if (n == string_end)
 977     return 0;
 978
 979   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
 980     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 981     return 0;
 982
 983   return FNM_NOMATCH;
 984 }
 985
 986
 987 static const CHAR *
 988 internal_function
 989 END (const CHAR *pattern)
 990 {
 991   const CHAR *p = pattern;
 992
 993   while (1)
 994     if (*++p == L_('\0'))
 995       /* This is an invalid pattern.  */
 996       return pattern;
 997     else if (*p == L_('['))
 998       {
 999         /* Handle brackets special.  */
1000         if (posixly_correct == 0)
1001           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1002
1003         /* Skip the not sign.  We have to recognize it because of a possibly
1004            following ']'.  */
1005         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1006           ++p;
1007         /* A leading ']' is recognized as such.  */
1008         if (*p == L_(']'))
1009           ++p;
1010         /* Skip over all characters of the list.  */
1011         while (*p != L_(']'))
1012           if (*p++ == L_('\0'))
1013             /* This is no valid pattern.  */
1014             return pattern;
1015       }
1016     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1017               || *p == L_('!')) && p[1] == L_('('))
1018       p = END (p + 1);
1019     else if (*p == L_(')'))
1020       break;
1021
1022   return p + 1;
1023 }
1024
1025
1026 static int
1027 internal_function
1028 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1029      bool no_leading_period, int flags)
1030 {
1031   const CHAR *startp;
1032   size_t level;
1033   struct patternlist
1034   {
1035     struct patternlist *next;
1036     CHAR str[1];
1037   } *list = NULL;
1038   struct patternlist **lastp = &list;
1039   size_t pattern_len = STRLEN (pattern);
1040   const CHAR *p;
1041   const CHAR *rs;
1042   enum { ALLOCA_LIMIT = 8000 };
1043
1044   /* Parse the pattern.  Store the individual parts in the list.  */
1045   level = 0;
1046   for (startp = p = pattern + 1; ; ++p)
1047     if (*p == L_('\0'))
1048       /* This is an invalid pattern.  */
1049       return -1;
1050     else if (*p == L_('['))
1051       {
1052         /* Handle brackets special.  */
1053         if (posixly_correct == 0)
1054           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1055
1056         /* Skip the not sign.  We have to recognize it because of a possibly
1057            following ']'.  */
1058         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1059           ++p;
1060         /* A leading ']' is recognized as such.  */
1061         if (*p == L_(']'))
1062           ++p;
1063         /* Skip over all characters of the list.  */
1064         while (*p != L_(']'))
1065           if (*p++ == L_('\0'))
1066             /* This is no valid pattern.  */
1067             return -1;
1068       }
1069     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1070               || *p == L_('!')) && p[1] == L_('('))
1071       /* Remember the nesting level.  */
1072       ++level;
1073     else if (*p == L_(')'))
1074       {
1075         if (level-- == 0)
1076           {
1077             /* This means we found the end of the pattern.  */
1078 #define NEW_PATTERN \
1079             struct patternlist *newp;                                         \
1080             size_t plen;                                                      \
1081             size_t plensize;                                                  \
1082             size_t newpsize;                                                  \
1083                                                                               \
1084             plen = (opt == L_('?') || opt == L_('@')                          \
1085                     ? pattern_len                                             \
1086                     : p - startp + 1UL);                                      \
1087             plensize = plen * sizeof (CHAR);                                  \
1088             newpsize = offsetof (struct patternlist, str) + plensize;         \
1089             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1090                 || newpsize < offsetof (struct patternlist, str)              \
1091                 || ALLOCA_LIMIT <= newpsize)                                  \
1092               return -1;                                                      \
1093             newp = (struct patternlist *) alloca (newpsize);                  \
1094             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1095             newp->next = NULL;                                                \
1096             *lastp = newp;                                                    \
1097             lastp = &newp->next
1098             NEW_PATTERN;
1099             break;
1100           }
1101       }
1102     else if (*p == L_('|'))
1103       {
1104         if (level == 0)
1105           {
1106             NEW_PATTERN;
1107             startp = p + 1;
1108           }
1109       }
1110   assert (list != NULL);
1111   assert (p[-1] == L_(')'));
1112 #undef NEW_PATTERN
1113
1114   switch (opt)
1115     {
1116     case L_('*'):
1117       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1118         return 0;
1119       /* FALLTHROUGH */
1120
1121     case L_('+'):
1122       do
1123         {
1124           for (rs = string; rs <= string_end; ++rs)
1125             /* First match the prefix with the current pattern with the
1126                current pattern.  */
1127             if (FCT (list->str, string, rs, no_leading_period,
1128                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1129                 /* This was successful.  Now match the rest with the rest
1130                    of the pattern.  */
1131                 && (FCT (p, rs, string_end,
1132                          rs == string
1133                          ? no_leading_period
1134                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1135                          flags & FNM_FILE_NAME
1136                          ? flags : flags & ~FNM_PERIOD) == 0
1137                     /* This didn't work.  Try the whole pattern.  */
1138                     || (rs != string
1139                         && FCT (pattern - 1, rs, string_end,
1140                                 rs == string
1141                                 ? no_leading_period
1142                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1143                                 flags & FNM_FILE_NAME
1144                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1145               /* It worked.  Signal success.  */
1146               return 0;
1147         }
1148       while ((list = list->next) != NULL);
1149
1150       /* None of the patterns lead to a match.  */
1151       return FNM_NOMATCH;
1152
1153     case L_('?'):
1154       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1155         return 0;
1156       /* FALLTHROUGH */
1157
1158     case L_('@'):
1159       do
1160         /* I cannot believe it but 'strcat' is actually acceptable
1161            here.  Match the entire string with the prefix from the
1162            pattern list and the rest of the pattern following the
1163            pattern list.  */
1164         if (FCT (STRCAT (list->str, p), string, string_end,
1165                  no_leading_period,
1166                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1167           /* It worked.  Signal success.  */
1168           return 0;
1169       while ((list = list->next) != NULL);
1170
1171       /* None of the patterns lead to a match.  */
1172       return FNM_NOMATCH;
1173
1174     case L_('!'):
1175       for (rs = string; rs <= string_end; ++rs)
1176         {
1177           struct patternlist *runp;
1178
1179           for (runp = list; runp != NULL; runp = runp->next)
1180             if (FCT (runp->str, string, rs,  no_leading_period,
1181                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1182               break;
1183
1184           /* If none of the patterns matched see whether the rest does.  */
1185           if (runp == NULL
1186               && (FCT (p, rs, string_end,
1187                        rs == string
1188                        ? no_leading_period
1189                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1190                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1191                   == 0))
1192             /* This is successful.  */
1193             return 0;
1194         }
1195
1196       /* None of the patterns together with the rest of the pattern
1197          lead to a match.  */
1198       return FNM_NOMATCH;
1199
1200     default:
1201       assert (! "Invalid extended matching operator");
1202       break;
1203     }
1204
1205   return -1;
1206 }
1207
1208
1209 #undef FOLD
1210 #undef CHAR
1211 #undef UCHAR
1212 #undef INT
1213 #undef FCT
1214 #undef EXT
1215 #undef END
1216 #undef MEMPCPY
1217 #undef MEMCHR
1218 #undef STRLEN
1219 #undef STRCAT
1220 #undef L_
1221 #undef BTOWC