(strtol): Do not declare if HAVE_DECL_STRTOL.
[debian/tar] / lib / fnmatch_loop.c
1 /* Copyright (C) 1991-1993, 1996-1999, 2000 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Library General Public License as
6    published by the Free Software Foundation; either version 2 of the
7    License, or (at your option) any later version.
8
9    This library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Library General Public License for more details.
13
14    You should have received a copy of the GNU Library General Public
15    License along with this library; see the file COPYING.LIB.  If not,
16    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17    Boston, MA 02111-1307, USA.  */
18
19 /* Match STRING against the filename pattern PATTERN, returning zero if
20    it matches, nonzero if not.  */
21 static int FCT (const CHAR *pattern, const CHAR *string,
22                 int no_leading_period, int flags) internal_function;
23
24 static int
25 internal_function
26 FCT (pattern, string, no_leading_period, flags)
27      const CHAR *pattern;
28      const CHAR *string;
29      int no_leading_period;
30      int flags;
31 {
32   register const CHAR *p = pattern, *n = string;
33   register UCHAR c;
34 #ifdef _LIBC
35   const UCHAR *collseq = (const UCHAR *)
36     _NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX));
37 # ifdef WIDE_CHAR_VERSION
38   const wint_t *names = (const wint_t *)
39     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
40   size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
41   size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
42 # endif
43 #endif
44
45   while ((c = *p++) != L('\0'))
46     {
47       c = FOLD (c);
48
49       switch (c)
50         {
51         case L('?'):
52           if (*n == L('\0'))
53             return FNM_NOMATCH;
54           else if (*n == L('/') && (flags & FNM_FILE_NAME))
55             return FNM_NOMATCH;
56           else if (*n == L('.') && no_leading_period
57                    && (n == string
58                        || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
59             return FNM_NOMATCH;
60           break;
61
62         case L('\\'):
63           if (!(flags & FNM_NOESCAPE))
64             {
65               c = *p++;
66               if (c == L('\0'))
67                 /* Trailing \ loses.  */
68                 return FNM_NOMATCH;
69               c = FOLD (c);
70             }
71           if (FOLD ((UCHAR) *n) != c)
72             return FNM_NOMATCH;
73           break;
74
75         case L('*'):
76           if (*n == L('.') && no_leading_period
77               && (n == string
78                   || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
79             return FNM_NOMATCH;
80
81           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
82             {
83               if (*n == L('/') && (flags & FNM_FILE_NAME))
84                 /* A slash does not match a wildcard under FNM_FILE_NAME.  */
85                 return FNM_NOMATCH;
86               else if (c == L('?'))
87                 {
88                   /* A ? needs to match one character.  */
89                   if (*n == L('\0'))
90                     /* There isn't another character; no match.  */
91                     return FNM_NOMATCH;
92                   else
93                     /* One character of the string is consumed in matching
94                        this ? wildcard, so *??? won't match if there are
95                        less than three characters.  */
96                     ++n;
97                 }
98             }
99
100           if (c == L('\0'))
101             /* The wildcard(s) is/are the last element of the pattern.
102                If the name is a file name and contains another slash
103                this does mean it cannot match.  If the FNM_LEADING_DIR
104                flag is set and exactly one slash is following, we have
105                a match.  */
106             {
107               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
108
109               if (flags & FNM_FILE_NAME)
110                 {
111                   const CHAR *slashp = STRCHR (n, L('/'));
112
113                   if (flags & FNM_LEADING_DIR)
114                     {
115                       if (slashp != NULL
116                           && STRCHR (slashp + 1, L('/')) == NULL)
117                         result = 0;
118                     }
119                   else
120                     {
121                       if (slashp == NULL)
122                         result = 0;
123                     }
124                 }
125
126               return result;
127             }
128           else
129             {
130               const CHAR *endp;
131
132               endp = STRCHRNUL (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'));
133
134               if (c == L('['))
135                 {
136                   int flags2 = ((flags & FNM_FILE_NAME)
137                                 ? flags : (flags & ~FNM_PERIOD));
138
139                   for (--p; n < endp; ++n)
140                     if (FCT (p, n, (no_leading_period
141                                     && (n == string
142                                         || (n[-1] == L('/')
143                                             && (flags & FNM_FILE_NAME)))),
144                              flags2) == 0)
145                       return 0;
146                 }
147               else if (c == L('/') && (flags & FNM_FILE_NAME))
148                 {
149                   while (*n != L('\0') && *n != L('/'))
150                     ++n;
151                   if (*n == L('/')
152                       && (FCT (p, n + 1, flags & FNM_PERIOD, flags) == 0))
153                     return 0;
154                 }
155               else
156                 {
157                   int flags2 = ((flags & FNM_FILE_NAME)
158                                 ? flags : (flags & ~FNM_PERIOD));
159
160                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
161                     c = *p;
162                   c = FOLD (c);
163                   for (--p; n < endp; ++n)
164                     if (FOLD ((UCHAR) *n) == c
165                         && (FCT (p, n, (no_leading_period
166                                         && (n == string
167                                             || (n[-1] == L('/')
168                                                 && (flags & FNM_FILE_NAME)))),
169                                  flags2) == 0))
170                       return 0;
171                 }
172             }
173
174           /* If we come here no match is possible with the wildcard.  */
175           return FNM_NOMATCH;
176
177         case L('['):
178           {
179             /* Nonzero if the sense of the character class is inverted.  */
180             static int posixly_correct;
181             register int not;
182             CHAR cold;
183
184             if (posixly_correct == 0)
185               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
186
187             if (*n == L('\0'))
188               return FNM_NOMATCH;
189
190             if (*n == L('.') && no_leading_period
191                 && (n == string
192                     || (n[-1] == L('/') && (flags & FNM_FILE_NAME))))
193               return FNM_NOMATCH;
194
195             if (*n == L('/') && (flags & FNM_FILE_NAME))
196               /* `/' cannot be matched.  */
197               return FNM_NOMATCH;
198
199             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
200             if (not)
201               ++p;
202
203             c = *p++;
204             for (;;)
205               {
206                 UCHAR fn = FOLD ((UCHAR) *n);
207
208                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
209                   {
210                     if (*p == L('\0'))
211                       return FNM_NOMATCH;
212                     c = FOLD ((UCHAR) *p);
213                     ++p;
214
215                     if (c == fn)
216                       goto matched;
217                   }
218                 else if (c == L('[') && *p == L(':'))
219                   {
220                     /* Leave room for the null.  */
221                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
222                     size_t c1 = 0;
223 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
224                     wctype_t wt;
225 #endif
226                     const CHAR *startp = p;
227
228                     for (;;)
229                       {
230                         if (c1 == CHAR_CLASS_MAX_LENGTH)
231                           /* The name is too long and therefore the pattern
232                              is ill-formed.  */
233                           return FNM_NOMATCH;
234
235                         c = *++p;
236                         if (c == L(':') && p[1] == L(']'))
237                           {
238                             p += 2;
239                             break;
240                           }
241                         if (c < L('a') || c >= L('z'))
242                           {
243                             /* This cannot possibly be a character class name.
244                                Match it as a normal range.  */
245                             p = startp;
246                             c = L('[');
247                             goto normal_bracket;
248                           }
249                         str[c1++] = c;
250                       }
251                     str[c1] = L('\0');
252
253 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
254                     wt = IS_CHAR_CLASS (str);
255                     if (wt == 0)
256                       /* Invalid character class name.  */
257                       return FNM_NOMATCH;
258
259                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
260                       goto matched;
261 #else
262                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
263                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
264                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
265                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
266                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
267                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
268                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
269                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
270                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
271                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
272                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
273                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
274                       goto matched;
275 #endif
276                   }
277                 else if (c == L('\0'))
278                   /* [ (unterminated) loses.  */
279                   return FNM_NOMATCH;
280                 else
281                   {
282                     c = FOLD (c);
283                   normal_bracket:
284                     if (c == fn)
285                       goto matched;
286
287                     cold = c;
288                     c = *p++;
289
290                     if (c == L('-') && *p != L(']'))
291                       {
292 #if _LIBC
293                         /* We have to find the collation sequence
294                            value for C.  Collation sequence is nothing
295                            we can regularly access.  The sequence
296                            value is defined by the order in which the
297                            definitions of the collation values for the
298                            various characters appear in the source
299                            file.  A strange concept, nowhere
300                            documented.  */
301                         int32_t fseqidx;
302                         int32_t lseqidx;
303                         UCHAR cend = *p++;
304 # ifdef WIDE_CHAR_VERSION
305                         size_t cnt;
306 # endif
307
308                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
309                           cend = *p++;
310                         if (cend == L('\0'))
311                           return FNM_NOMATCH;
312
313 # ifdef WIDE_CHAR_VERSION
314                         /* Search in the `names' array for the characters.  */
315                         fseqidx = fn % size;
316                         cnt = 0;
317                         while (names[fseqidx] != fn)
318                           {
319                             if (++cnt == layers)
320                               /* XXX We don't know anything about
321                                  the character we are supposed to
322                                  match.  This means we are failing.  */
323                               goto range_not_matched;
324
325                             fseqidx += size;
326                           }
327                         lseqidx = cold % size;
328                         cnt = 0;
329                         while (names[lseqidx] != cold)
330                           {
331                             if (++cnt == layers)
332                               {
333                                 lseqidx = -1;
334                                 break;
335                               }
336                             lseqidx += size;
337                           }
338 # else
339                         fseqidx = fn;
340                         lseqidx = cold;
341 # endif
342
343                         /* XXX It is not entirely clear to me how to handle
344                            characters which are not mentioned in the
345                            collation specification.  */
346                         if (
347 # ifdef WIDE_CHAR_VERSION
348                             lseqidx == -1 ||
349 # endif
350                             collseq[lseqidx] <= collseq[fseqidx])
351                           {
352                             /* We have to look at the upper bound.  */
353                             int32_t hseqidx;
354
355                             cend = FOLD (cend);
356 # ifdef WIDE_CHAR_VERSION
357                             hseqidx = cend % size;
358                             cnt = 0;
359                             while (names[hseqidx] != cend)
360                               {
361                                 if (++cnt == layers)
362                                   {
363                                     /* Hum, no information about the upper
364                                        bound.  The matching succeeds if the
365                                        lower bound is matched exactly.  */
366                                     if (lseqidx == -1 || cold != fn)
367                                       goto range_not_matched;
368
369                                     goto matched;
370                                   }
371                               }
372 # else
373                             hseqidx = cend;
374 # endif
375
376                             if (
377 # ifdef WIDE_CHAR_VERSION
378                                 (lseqidx == -1
379                                  && collseq[fseqidx] == collseq[hseqidx]) ||
380 # endif
381                                 collseq[fseqidx] <= collseq[hseqidx])
382                               goto matched;
383                           }
384 # ifdef WIDE_CHAR_VERSION
385                       range_not_matched:
386 # endif
387 #else
388                         /* We use a boring value comparison of the character
389                            values.  This is better than comparing using
390                            `strcoll' since the latter would have surprising
391                            and sometimes fatal consequences.  */
392                         UCHAR cend = *p++;
393
394                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
395                           cend = *p++;
396                         if (cend == L('\0'))
397                           return FNM_NOMATCH;
398
399                         /* It is a range.  */
400                         if (cold <= fc && fc <= c)
401                           goto matched;
402 #endif
403
404                         c = *p++;
405                       }
406                   }
407
408                 if (c == L(']'))
409                   break;
410               }
411
412             if (!not)
413               return FNM_NOMATCH;
414             break;
415
416           matched:
417             /* Skip the rest of the [...] that already matched.  */
418             while (c != L(']'))
419               {
420                 if (c == L('\0'))
421                   /* [... (unterminated) loses.  */
422                   return FNM_NOMATCH;
423
424                 c = *p++;
425                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
426                   {
427                     if (*p == L('\0'))
428                       return FNM_NOMATCH;
429                     /* XXX 1003.2d11 is unclear if this is right.  */
430                     ++p;
431                   }
432                 else if (c == L('[') && *p == L(':'))
433                   {
434                     do
435                       if (*++p == L('\0'))
436                         return FNM_NOMATCH;
437                     while (*p != L(':') || p[1] == L(']'));
438                     p += 2;
439                     c = *p;
440                   }
441               }
442             if (not)
443               return FNM_NOMATCH;
444           }
445           break;
446
447         default:
448           if (c != FOLD ((UCHAR) *n))
449             return FNM_NOMATCH;
450         }
451
452       ++n;
453     }
454
455   if (*n == '\0')
456     return 0;
457
458   if ((flags & FNM_LEADING_DIR) && *n == L('/'))
459     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
460     return 0;
461
462   return FNM_NOMATCH;
463 }
464
465 #undef FOLD
466 #undef CHAR
467 #undef UCHAR
468 #undef FCT
469 #undef STRCHR
470 #undef STRCHRNUL
471 #undef STRCOLL
472 #undef L
473 #undef BTOWC
474 #undef SUFFIX