git.gag.com Git - debian/amanda/blob - common-src/match.c

   1 /*
   2  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
   3  * Copyright (c) 1991-1998 University of Maryland at College Park
   4  * All Rights Reserved.
   5  *
   6  * Permission to use, copy, modify, distribute, and sell this software and its
   7  * documentation for any purpose is hereby granted without fee, provided that
   8  * the above copyright notice appear in all copies and that both that
   9  * copyright notice and this permission notice appear in supporting
  10  * documentation, and that the name of U.M. not be used in advertising or
  11  * publicity pertaining to distribution of the software without specific,
  12  * written prior permission.  U.M. makes no representations about the
  13  * suitability of this software for any purpose.  It is provided "as is"
  14  * without express or implied warranty.
  15  *
  16  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
  18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  22  *
  23  * Authors: the Amanda Development Team.  Its members are listed in a
  24  * file named AUTHORS, in the root directory of this distribution.
  25  */
  26 /*
  27  * $Id: match.c,v 1.10.4.1.4.1.2.4.2.2 2004/12/21 14:20:20 martinea Exp $
  28  *
  29  * functions for checking and matching regular expressions
  30  */
  31
  32 #include "amanda.h"
  33 #include "regex.h"
  34
  35 char *validate_regexp(regex)
  36 char *regex;
  37 {
  38     regex_t regc;
  39     int result;
  40     static char errmsg[STR_SIZE];
  41
  42     if ((result = regcomp(&regc, regex,
  43                           REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  44       regerror(result, &regc, errmsg, sizeof(errmsg));
  45       return errmsg;
  46     }
  47
  48     regfree(&regc);
  49
  50     return NULL;
  51 }
  52
  53 char *clean_regex(regex)
  54 char *regex;
  55 {
  56     char *result;
  57     int j;
  58     size_t i;
  59     result = alloc(2*strlen(regex)+1);
  60
  61     for(i=0,j=0;i<strlen(regex);i++) {
  62         if(!isalnum((int)regex[i]))
  63             result[j++]='\\';
  64         result[j++]=regex[i];
  65     }
  66     result[j++] = '\0';
  67     return result;
  68 }
  69
  70 int match(regex, str)
  71 char *regex, *str;
  72 {
  73     regex_t regc;
  74     int result;
  75     char errmsg[STR_SIZE];
  76
  77     if((result = regcomp(&regc, regex,
  78                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  79         regerror(result, &regc, errmsg, sizeof(errmsg));
  80         error("regex \"%s\": %s", regex, errmsg);
  81     }
  82
  83     if((result = regexec(&regc, str, 0, 0, 0)) != 0
  84        && result != REG_NOMATCH) {
  85         regerror(result, &regc, errmsg, sizeof(errmsg));
  86         error("regex \"%s\": %s", regex, errmsg);
  87     }
  88
  89     regfree(&regc);
  90
  91     return result == 0;
  92 }
  93
  94 char *validate_glob(glob)
  95 char *glob;
  96 {
  97     char *regex = NULL;
  98     regex_t regc;
  99     int result;
 100     static char errmsg[STR_SIZE];
 101
 102     regex = glob_to_regex(glob);
 103     if ((result = regcomp(&regc, regex,
 104                           REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 105       regerror(result, &regc, errmsg, sizeof(errmsg));
 106       amfree(regex);
 107       return errmsg;
 108     }
 109
 110     regfree(&regc);
 111     amfree(regex);
 112
 113     return NULL;
 114 }
 115
 116 int match_glob(glob, str)
 117 char *glob, *str;
 118 {
 119     char *regex = NULL;
 120     regex_t regc;
 121     int result;
 122     char errmsg[STR_SIZE];
 123
 124     regex = glob_to_regex(glob);
 125     if((result = regcomp(&regc, regex,
 126                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 127         regerror(result, &regc, errmsg, sizeof(errmsg));
 128         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 129     }
 130
 131     if((result = regexec(&regc, str, 0, 0, 0)) != 0
 132        && result != REG_NOMATCH) {
 133         regerror(result, &regc, errmsg, sizeof(errmsg));
 134         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 135     }
 136
 137     regfree(&regc);
 138     amfree(regex);
 139
 140     return result == 0;
 141 }
 142
 143 char *glob_to_regex(glob)
 144 char *glob;
 145 {
 146     char *regex;
 147     char *r;
 148     size_t len;
 149     int ch;
 150     int last_ch;
 151
 152     /*
 153      * Allocate an area to convert into.  The worst case is a five to
 154      * one expansion.
 155      */
 156     len = strlen(glob);
 157     regex = alloc(1 + len * 5 + 1 + 1);
 158
 159     /*
 160      * Do the conversion:
 161      *
 162      *  ?      -> [^/]
 163      *  *      -> [^/]*
 164      *  [!...] -> [^...]
 165      *
 166      * The following are given a leading backslash to protect them
 167      * unless they already have a backslash:
 168      *
 169      *   ( ) { } + . ^ $ |
 170      *
 171      * Put a leading ^ and trailing $ around the result.  If the last
 172      * non-escaped character is \ leave the $ off to cause a syntax
 173      * error when the regex is compiled.
 174      */
 175
 176     r = regex;
 177     *r++ = '^';
 178     last_ch = '\0';
 179     for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
 180         if (last_ch == '\\') {
 181             *r++ = ch;
 182             ch = '\0';                  /* so last_ch != '\\' next time */
 183         } else if (last_ch == '[' && ch == '!') {
 184             *r++ = '^';
 185         } else if (ch == '\\') {
 186             *r++ = ch;
 187         } else if (ch == '*' || ch == '?') {
 188             *r++ = '[';
 189             *r++ = '^';
 190             *r++ = '/';
 191             *r++ = ']';
 192             if (ch == '*') {
 193                 *r++ = '*';
 194             }
 195         } else if (ch == '('
 196                    || ch == ')'
 197                    || ch == '{'
 198                    || ch == '}'
 199                    || ch == '+'
 200                    || ch == '.'
 201                    || ch == '^'
 202                    || ch == '$'
 203                    || ch == '|') {
 204             *r++ = '\\';
 205             *r++ = ch;
 206         } else {
 207             *r++ = ch;
 208         }
 209     }
 210     if (last_ch != '\\') {
 211         *r++ = '$';
 212     }
 213     *r = '\0';
 214
 215     return regex;
 216 }
 217
 218
 219 int match_tar(glob, str)
 220 char *glob, *str;
 221 {
 222     char *regex = NULL;
 223     regex_t regc;
 224     int result;
 225     char errmsg[STR_SIZE];
 226
 227     regex = tar_to_regex(glob);
 228     if((result = regcomp(&regc, regex,
 229                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 230         regerror(result, &regc, errmsg, sizeof(errmsg));
 231         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 232     }
 233
 234     if((result = regexec(&regc, str, 0, 0, 0)) != 0
 235        && result != REG_NOMATCH) {
 236         regerror(result, &regc, errmsg, sizeof(errmsg));
 237         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 238     }
 239
 240     regfree(&regc);
 241     amfree(regex);
 242
 243     return result == 0;
 244 }
 245
 246 char *tar_to_regex(glob)
 247 char *glob;
 248 {
 249     char *regex;
 250     char *r;
 251     size_t len;
 252     int ch;
 253     int last_ch;
 254
 255     /*
 256      * Allocate an area to convert into.  The worst case is a five to
 257      * one expansion.
 258      */
 259     len = strlen(glob);
 260     regex = alloc(1 + len * 5 + 1 + 1);
 261
 262     /*
 263      * Do the conversion:
 264      *
 265      *  ?      -> [^/]
 266      *  *      -> .*
 267      *  [!...] -> [^...]
 268      *
 269      * The following are given a leading backslash to protect them
 270      * unless they already have a backslash:
 271      *
 272      *   ( ) { } + . ^ $ |
 273      *
 274      * Put a leading ^ and trailing $ around the result.  If the last
 275      * non-escaped character is \ leave the $ off to cause a syntax
 276      * error when the regex is compiled.
 277      */
 278
 279     r = regex;
 280     *r++ = '^';
 281     last_ch = '\0';
 282     for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
 283         if (last_ch == '\\') {
 284             *r++ = ch;
 285             ch = '\0';                  /* so last_ch != '\\' next time */
 286         } else if (last_ch == '[' && ch == '!') {
 287             *r++ = '^';
 288         } else if (ch == '\\') {
 289             *r++ = ch;
 290         } else if (ch == '*') {
 291             *r++ = '.';
 292             *r++ = '*';
 293         } else if (ch == '?') {
 294             *r++ = '[';
 295             *r++ = '^';
 296             *r++ = '/';
 297             *r++ = ']';
 298         } else if (ch == '('
 299                    || ch == ')'
 300                    || ch == '{'
 301                    || ch == '}'
 302                    || ch == '+'
 303                    || ch == '.'
 304                    || ch == '^'
 305                    || ch == '$'
 306                    || ch == '|') {
 307             *r++ = '\\';
 308             *r++ = ch;
 309         } else {
 310             *r++ = ch;
 311         }
 312     }
 313     if (last_ch != '\\') {
 314         *r++ = '$';
 315     }
 316     *r = '\0';
 317
 318     return regex;
 319 }
 320
 321
 322 int match_word(glob, word, separator)
 323 char *glob, *word;
 324 char separator;
 325 {
 326     char *regex;
 327     char *r;
 328     size_t  len;
 329     int  ch;
 330     int  last_ch;
 331     int  next_ch;
 332     size_t  lenword;
 333     char *nword;
 334     char *nglob;
 335     char *g, *w;
 336     int  i;
 337
 338     lenword = strlen(word);
 339     nword = (char *)alloc(lenword + 3);
 340
 341     r = nword;
 342     w = word;
 343     if(lenword == 1 && *w == separator) {
 344         *r++ = separator;
 345         *r++ = separator;
 346     }
 347     else {
 348         if(*w != separator)
 349             *r++ = separator;
 350         while(*w != '\0')
 351             *r++ = *w++;
 352         if(*(r-1) != separator)
 353             *r++ = separator;
 354     }
 355     *r = '\0';
 356
 357     /*
 358      * Allocate an area to convert into.  The worst case is a six to
 359      * one expansion.
 360      */
 361     len = strlen(glob);
 362     regex = (char *)alloc(1 + len * 6 + 1 + 1 + 2 + 2);
 363     r = regex;
 364     nglob = stralloc(glob);
 365     g = nglob;
 366
 367     if((len == 1 && nglob[0] == separator) ||
 368        (len == 2 && nglob[0] == '^' && nglob[1] == separator) ||
 369        (len == 2 && nglob[0] == separator && nglob[1] == '$') ||
 370        (len == 3 && nglob[0] == '^' && nglob[1] == separator &&
 371         nglob[2] == '$')) {
 372         *r++ = '^';
 373         *r++ = '\\';
 374         *r++ = separator;
 375         *r++ = '\\';
 376         *r++ = separator;
 377         *r++ = '$';
 378     }
 379     else {
 380         /*
 381          * Do the conversion:
 382          *
 383          *  ?      -> [^\separator]
 384          *  *      -> [^\separator]*
 385          *  [!...] -> [^...]
 386          *  **     -> .*
 387          *
 388          * The following are given a leading backslash to protect them
 389          * unless they already have a backslash:
 390          *
 391          *   ( ) { } + . ^ $ |
 392          *
 393          * If the last
 394          * non-escaped character is \ leave it to cause a syntax
 395          * error when the regex is compiled.
 396          */
 397
 398         if(*g == '^') {
 399             *r++ = '^';
 400             *r++ = '\\';        /* escape the separator */
 401             *r++ = separator;
 402             g++;
 403             if(*g == separator) g++;
 404         }
 405         else if(*g != separator) {
 406             *r++ = '\\';        /* add a leading \separator */
 407             *r++ = separator;
 408         }
 409         last_ch = '\0';
 410         for (ch = *g++; ch != '\0'; last_ch = ch, ch = *g++) {
 411             next_ch = *g;
 412             if (last_ch == '\\') {
 413                 *r++ = ch;
 414                 ch = '\0';              /* so last_ch != '\\' next time */
 415             } else if (last_ch == '[' && ch == '!') {
 416                 *r++ = '^';
 417             } else if (ch == '\\') {
 418                 *r++ = ch;
 419             } else if (ch == '*' || ch == '?') {
 420                 if(ch == '*' && next_ch == '*') {
 421                     *r++ = '.';
 422                     g++;
 423                 }
 424                 else {
 425                     *r++ = '[';
 426                     *r++ = '^';
 427                     *r++ = '\\';
 428                     *r++ = separator;
 429                     *r++ = ']';
 430                 }
 431                 if (ch == '*') {
 432                     *r++ = '*';
 433                 }
 434             } else if (ch == '$' && next_ch == '\0') {
 435                 if(last_ch != separator) {
 436                     *r++ = '\\';
 437                     *r++ = separator;
 438                 }
 439                 *r++ = ch;
 440             } else if (   ch == '('
 441                        || ch == ')'
 442                        || ch == '{'
 443                        || ch == '}'
 444                        || ch == '+'
 445                        || ch == '.'
 446                        || ch == '^'
 447                        || ch == '$'
 448                        || ch == '|') {
 449                 *r++ = '\\';
 450                 *r++ = ch;
 451             } else {
 452                 *r++ = ch;
 453             }
 454         }
 455         if(last_ch != '\\') {
 456             if(last_ch != separator && last_ch != '$') {
 457                 *r++ = '\\';
 458                 *r++ = separator;               /* add a trailing \separator */
 459             }
 460         }
 461     }
 462     *r = '\0';
 463
 464     i = match(regex,nword);
 465
 466     amfree(nword);
 467     amfree(nglob);
 468     amfree(regex);
 469     return i;
 470 }
 471
 472
 473 int match_host(glob, host)
 474 char *glob, *host;
 475 {
 476     char *lglob, *lhost;
 477     char *c, *d;
 478     int i;
 479
 480
 481     lglob = (char *)alloc(strlen(glob)+1);
 482     c = lglob, d=glob;
 483     while( *d != '\0')
 484         *c++ = tolower(*d++);
 485     *c = *d;
 486
 487     lhost = (char *)alloc(strlen(host)+1);
 488     c = lhost, d=host;
 489     while( *d != '\0')
 490         *c++ = tolower(*d++);
 491     *c = *d;
 492
 493     i = match_word(lglob, lhost, '.');
 494     amfree(lglob);
 495     amfree(lhost);
 496     return i;
 497 }
 498
 499
 500 int match_disk(glob, disk)
 501 char *glob, *disk;
 502 {
 503     int i;
 504     i = match_word(glob, disk, '/');
 505     return i;
 506 }
 507
 508 int match_datestamp(dateexp, datestamp)
 509 char *dateexp, *datestamp;
 510 {
 511     char *dash;
 512     size_t len, len_suffix;
 513     int len_prefix;
 514     char firstdate[100], lastdate[100];
 515     char mydateexp[100];
 516     int match_exact;
 517
 518     if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
 519         error("Illegal datestamp expression %s",dateexp);
 520     }
 521
 522     if(dateexp[0] == '^') {
 523         strncpy(mydateexp, dateexp+1, strlen(dateexp)-1);
 524         mydateexp[strlen(dateexp)-1] = '\0';
 525     }
 526     else {
 527         strncpy(mydateexp, dateexp, strlen(dateexp));
 528         mydateexp[strlen(dateexp)] = '\0';
 529     }
 530
 531     if(mydateexp[strlen(mydateexp)] == '$') {
 532         match_exact = 1;
 533         mydateexp[strlen(mydateexp)] = '\0';
 534     }
 535     else
 536         match_exact = 0;
 537
 538     if((dash = strchr(mydateexp,'-'))) {
 539         if(match_exact == 1) {
 540             error("Illegal datestamp expression %s",dateexp);
 541         }
 542         len = dash - mydateexp;
 543         len_suffix = strlen(dash) - 1;
 544         len_prefix = len - len_suffix;
 545
 546         if(len_prefix < 0) {
 547             error("Illegal datestamp expression %s",dateexp);
 548         }
 549
 550         dash++;
 551         strncpy(firstdate, mydateexp, len);
 552         firstdate[len] = '\0';
 553         strncpy(lastdate, mydateexp, len_prefix);
 554         strncpy(&(lastdate[len_prefix]), dash, len_suffix);
 555         lastdate[len] = '\0';
 556         return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
 557                 (strncmp(datestamp, lastdate , strlen(lastdate))  <= 0));
 558     }
 559     else {
 560         if(match_exact == 1) {
 561             return (strcmp(datestamp, mydateexp) == 0);
 562         }
 563         else {
 564             return (strncmp(datestamp, mydateexp, strlen(mydateexp)) == 0);
 565         }
 566     }
 567 }