git.gag.com Git - debian/amanda/blob - common-src/match.c

   1 /*
   2  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
   3  * Copyright (c) 1991-1998 University of Maryland at College Park
   4  * All Rights Reserved.
   5  *
   6  * Permission to use, copy, modify, distribute, and sell this software and its
   7  * documentation for any purpose is hereby granted without fee, provided that
   8  * the above copyright notice appear in all copies and that both that
   9  * copyright notice and this permission notice appear in supporting
  10  * documentation, and that the name of U.M. not be used in advertising or
  11  * publicity pertaining to distribution of the software without specific,
  12  * written prior permission.  U.M. makes no representations about the
  13  * suitability of this software for any purpose.  It is provided "as is"
  14  * without express or implied warranty.
  15  *
  16  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
  18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  22  *
  23  * Authors: the Amanda Development Team.  Its members are listed in a
  24  * file named AUTHORS, in the root directory of this distribution.
  25  */
  26 /*
  27  * $Id: match.c,v 1.23 2006/05/25 01:47:12 johnfranks Exp $
  28  *
  29  * functions for checking and matching regular expressions
  30  */
  31
  32 #include "amanda.h"
  33 #include <regex.h>
  34
  35 static int match_word(const char *glob, const char *word, const char separator);
  36
  37 char *
  38 validate_regexp(
  39     const char *        regex)
  40 {
  41     regex_t regc;
  42     int result;
  43     static char errmsg[STR_SIZE];
  44
  45     if ((result = regcomp(&regc, regex,
  46                           REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  47       regerror(result, &regc, errmsg, SIZEOF(errmsg));
  48       return errmsg;
  49     }
  50
  51     regfree(&regc);
  52
  53     return NULL;
  54 }
  55
  56 char *
  57 clean_regex(
  58     const char *        regex)
  59 {
  60     char *result;
  61     int j;
  62     size_t i;
  63     result = alloc(2*strlen(regex)+1);
  64
  65     for(i=0,j=0;i<strlen(regex);i++) {
  66         if(!isalnum((int)regex[i]))
  67             result[j++]='\\';
  68         result[j++]=regex[i];
  69     }
  70     result[j] = '\0';
  71     return result;
  72 }
  73
  74 int
  75 match(
  76     const char *        regex,
  77     const char *        str)
  78 {
  79     regex_t regc;
  80     int result;
  81     char errmsg[STR_SIZE];
  82
  83     if((result = regcomp(&regc, regex,
  84                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  85         regerror(result, &regc, errmsg, SIZEOF(errmsg));
  86         error(_("regex \"%s\": %s"), regex, errmsg);
  87         /*NOTREACHED*/
  88     }
  89
  90     if((result = regexec(&regc, str, 0, 0, 0)) != 0
  91        && result != REG_NOMATCH) {
  92         regerror(result, &regc, errmsg, SIZEOF(errmsg));
  93         error(_("regex \"%s\": %s"), regex, errmsg);
  94         /*NOTREACHED*/
  95     }
  96
  97     regfree(&regc);
  98
  99     return result == 0;
 100 }
 101
 102 int
 103 match_no_newline(
 104     const char *        regex,
 105     const char *        str)
 106 {
 107     regex_t regc;
 108     int result;
 109     char errmsg[STR_SIZE];
 110
 111     if((result = regcomp(&regc, regex,
 112                          REG_EXTENDED|REG_NOSUB)) != 0) {
 113         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 114         error(_("regex \"%s\": %s"), regex, errmsg);
 115         /*NOTREACHED*/
 116     }
 117
 118     if((result = regexec(&regc, str, 0, 0, 0)) != 0
 119        && result != REG_NOMATCH) {
 120         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 121         error(_("regex \"%s\": %s"), regex, errmsg);
 122         /*NOTREACHED*/
 123     }
 124
 125     regfree(&regc);
 126
 127     return result == 0;
 128 }
 129
 130 char *
 131 validate_glob(
 132     const char *        glob)
 133 {
 134     char *regex;
 135     regex_t regc;
 136     int result;
 137     static char errmsg[STR_SIZE];
 138
 139     regex = glob_to_regex(glob);
 140     if ((result = regcomp(&regc, regex,
 141                           REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 142       regerror(result, &regc, errmsg, SIZEOF(errmsg));
 143       amfree(regex);
 144       return errmsg;
 145     }
 146
 147     regfree(&regc);
 148     amfree(regex);
 149
 150     return NULL;
 151 }
 152
 153 int
 154 match_glob(
 155     const char *        glob,
 156     const char *        str)
 157 {
 158     char *regex;
 159     regex_t regc;
 160     int result;
 161     char errmsg[STR_SIZE];
 162
 163     regex = glob_to_regex(glob);
 164     if((result = regcomp(&regc, regex,
 165                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 166         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 167         error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
 168         /*NOTREACHED*/
 169     }
 170
 171     if((result = regexec(&regc, str, 0, 0, 0)) != 0
 172        && result != REG_NOMATCH) {
 173         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 174         error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
 175         /*NOTREACHED*/
 176     }
 177
 178     regfree(&regc);
 179     amfree(regex);
 180
 181     return result == 0;
 182 }
 183
 184 char *
 185 glob_to_regex(
 186     const char *        glob)
 187 {
 188     char *regex;
 189     char *r;
 190     size_t len;
 191     int ch;
 192     int last_ch;
 193
 194     /*
 195      * Allocate an area to convert into.  The worst case is a five to
 196      * one expansion.
 197      */
 198     len = strlen(glob);
 199     regex = alloc(1 + len * 5 + 1 + 1);
 200
 201     /*
 202      * Do the conversion:
 203      *
 204      *  ?      -> [^/]
 205      *  *      -> [^/]*
 206      *  [!...] -> [^...]
 207      *
 208      * The following are given a leading backslash to protect them
 209      * unless they already have a backslash:
 210      *
 211      *   ( ) { } + . ^ $ |
 212      *
 213      * Put a leading ^ and trailing $ around the result.  If the last
 214      * non-escaped character is \ leave the $ off to cause a syntax
 215      * error when the regex is compiled.
 216      */
 217
 218     r = regex;
 219     *r++ = '^';
 220     last_ch = '\0';
 221     for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
 222         if (last_ch == '\\') {
 223             *r++ = (char)ch;
 224             ch = '\0';                  /* so last_ch != '\\' next time */
 225         } else if (last_ch == '[' && ch == '!') {
 226             *r++ = '^';
 227         } else if (ch == '\\') {
 228             *r++ = (char)ch;
 229         } else if (ch == '*' || ch == '?') {
 230             *r++ = '[';
 231             *r++ = '^';
 232             *r++ = '/';
 233             *r++ = ']';
 234             if (ch == '*') {
 235                 *r++ = '*';
 236             }
 237         } else if (ch == '('
 238                    || ch == ')'
 239                    || ch == '{'
 240                    || ch == '}'
 241                    || ch == '+'
 242                    || ch == '.'
 243                    || ch == '^'
 244                    || ch == '$'
 245                    || ch == '|') {
 246             *r++ = '\\';
 247             *r++ = (char)ch;
 248         } else {
 249             *r++ = (char)ch;
 250         }
 251     }
 252     if (last_ch != '\\') {
 253         *r++ = '$';
 254     }
 255     *r = '\0';
 256
 257     return regex;
 258 }
 259
 260
 261 int
 262 match_tar(
 263     const char *        glob,
 264     const char *        str)
 265 {
 266     char *regex;
 267     regex_t regc;
 268     int result;
 269     char errmsg[STR_SIZE];
 270
 271     regex = tar_to_regex(glob);
 272     if((result = regcomp(&regc, regex,
 273                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 274         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 275         error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
 276         /*NOTREACHED*/
 277     }
 278
 279     if((result = regexec(&regc, str, 0, 0, 0)) != 0
 280        && result != REG_NOMATCH) {
 281         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 282         error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
 283         /*NOTREACHED*/
 284     }
 285
 286     regfree(&regc);
 287     amfree(regex);
 288
 289     return result == 0;
 290 }
 291
 292 char *
 293 tar_to_regex(
 294     const char *        glob)
 295 {
 296     char *regex;
 297     char *r;
 298     size_t len;
 299     int ch;
 300     int last_ch;
 301
 302     /*
 303      * Allocate an area to convert into.  The worst case is a five to
 304      * one expansion.
 305      */
 306     len = strlen(glob);
 307     regex = alloc(1 + len * 5 + 1 + 1);
 308
 309     /*
 310      * Do the conversion:
 311      *
 312      *  ?      -> [^/]
 313      *  *      -> .*
 314      *  [!...] -> [^...]
 315      *
 316      * The following are given a leading backslash to protect them
 317      * unless they already have a backslash:
 318      *
 319      *   ( ) { } + . ^ $ |
 320      *
 321      * Put a leading ^ and trailing $ around the result.  If the last
 322      * non-escaped character is \ leave the $ off to cause a syntax
 323      * error when the regex is compiled.
 324      */
 325
 326     r = regex;
 327     *r++ = '^';
 328     last_ch = '\0';
 329     for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
 330         if (last_ch == '\\') {
 331             *r++ = (char)ch;
 332             ch = '\0';                  /* so last_ch != '\\' next time */
 333         } else if (last_ch == '[' && ch == '!') {
 334             *r++ = '^';
 335         } else if (ch == '\\') {
 336             *r++ = (char)ch;
 337         } else if (ch == '*') {
 338             *r++ = '.';
 339             *r++ = '*';
 340         } else if (ch == '?') {
 341             *r++ = '[';
 342             *r++ = '^';
 343             *r++ = '/';
 344             *r++ = ']';
 345         } else if (ch == '('
 346                    || ch == ')'
 347                    || ch == '{'
 348                    || ch == '}'
 349                    || ch == '+'
 350                    || ch == '.'
 351                    || ch == '^'
 352                    || ch == '$'
 353                    || ch == '|') {
 354             *r++ = '\\';
 355             *r++ = (char)ch;
 356         } else {
 357             *r++ = (char)ch;
 358         }
 359     }
 360     if (last_ch != '\\') {
 361         *r++ = '$';
 362     }
 363     *r = '\0';
 364
 365     return regex;
 366 }
 367
 368
 369 static int
 370 match_word(
 371     const char *        glob,
 372     const char *        word,
 373     const char          separator)
 374 {
 375     char *regex;
 376     char *r;
 377     size_t  len;
 378     int  ch;
 379     int  last_ch;
 380     int  next_ch;
 381     size_t  lenword;
 382     char *nword;
 383     char *nglob;
 384     char *g;
 385     const char *w;
 386     int  i;
 387
 388     lenword = strlen(word);
 389     nword = (char *)alloc(lenword + 3);
 390
 391     r = nword;
 392     w = word;
 393     if(lenword == 1 && *w == separator) {
 394         *r++ = separator;
 395         *r++ = separator;
 396     }
 397     else {
 398         if(*w != separator)
 399             *r++ = separator;
 400         while(*w != '\0')
 401             *r++ = *w++;
 402         if(*(r-1) != separator)
 403             *r++ = separator;
 404     }
 405     *r = '\0';
 406
 407     /*
 408      * Allocate an area to convert into.  The worst case is a six to
 409      * one expansion.
 410      */
 411     len = strlen(glob);
 412     regex = (char *)alloc(1 + len * 6 + 1 + 1 + 2 + 2);
 413     r = regex;
 414     nglob = stralloc(glob);
 415     g = nglob;
 416
 417     if((len == 1 && nglob[0] == separator) ||
 418        (len == 2 && nglob[0] == '^' && nglob[1] == separator) ||
 419        (len == 2 && nglob[0] == separator && nglob[1] == '$') ||
 420        (len == 3 && nglob[0] == '^' && nglob[1] == separator &&
 421         nglob[2] == '$')) {
 422         *r++ = '^';
 423         *r++ = '\\';
 424         *r++ = separator;
 425         *r++ = '\\';
 426         *r++ = separator;
 427         *r++ = '$';
 428     }
 429     else {
 430         /*
 431          * Do the conversion:
 432          *
 433          *  ?      -> [^\separator]
 434          *  *      -> [^\separator]*
 435          *  [!...] -> [^...]
 436          *  **     -> .*
 437          *
 438          * The following are given a leading backslash to protect them
 439          * unless they already have a backslash:
 440          *
 441          *   ( ) { } + . ^ $ |
 442          *
 443          * If the last
 444          * non-escaped character is \ leave it to cause a syntax
 445          * error when the regex is compiled.
 446          */
 447
 448         if(*g == '^') {
 449             *r++ = '^';
 450             *r++ = '\\';        /* escape the separator */
 451             *r++ = separator;
 452             g++;
 453             if(*g == separator) g++;
 454         }
 455         else if(*g != separator) {
 456             *r++ = '\\';        /* add a leading \separator */
 457             *r++ = separator;
 458         }
 459         last_ch = '\0';
 460         for (ch = *g++; ch != '\0'; last_ch = ch, ch = *g++) {
 461             next_ch = *g;
 462             if (last_ch == '\\') {
 463                 *r++ = (char)ch;
 464                 ch = '\0';              /* so last_ch != '\\' next time */
 465             } else if (last_ch == '[' && ch == '!') {
 466                 *r++ = '^';
 467             } else if (ch == '\\') {
 468                 *r++ = (char)ch;
 469             } else if (ch == '*' || ch == '?') {
 470                 if(ch == '*' && next_ch == '*') {
 471                     *r++ = '.';
 472                     g++;
 473                 }
 474                 else {
 475                     *r++ = '[';
 476                     *r++ = '^';
 477                     *r++ = '\\';
 478                     *r++ = separator;
 479                     *r++ = ']';
 480                 }
 481                 if (ch == '*') {
 482                     *r++ = '*';
 483                 }
 484             } else if (ch == '$' && next_ch == '\0') {
 485                 if(last_ch != separator) {
 486                     *r++ = '\\';
 487                     *r++ = separator;
 488                 }
 489                 *r++ = (char)ch;
 490             } else if (   ch == '('
 491                        || ch == ')'
 492                        || ch == '{'
 493                        || ch == '}'
 494                        || ch == '+'
 495                        || ch == '.'
 496                        || ch == '^'
 497                        || ch == '$'
 498                        || ch == '|') {
 499                 *r++ = '\\';
 500                 *r++ = (char)ch;
 501             } else {
 502                 *r++ = (char)ch;
 503             }
 504         }
 505         if(last_ch != '\\') {
 506             if(last_ch != separator && last_ch != '$') {
 507                 *r++ = '\\';
 508                 *r++ = separator;               /* add a trailing \separator */
 509             }
 510         }
 511     }
 512     *r = '\0';
 513
 514     i = match(regex,nword);
 515
 516     amfree(nword);
 517     amfree(nglob);
 518     amfree(regex);
 519     return i;
 520 }
 521
 522
 523 int
 524 match_host(
 525     const char *        glob,
 526     const char *        host)
 527 {
 528     char *lglob, *lhost;
 529     char *c;
 530     const char *d;
 531     int i;
 532
 533
 534     lglob = (char *)alloc(strlen(glob)+1);
 535     c = lglob, d=glob;
 536     while( *d != '\0')
 537         *c++ = (char)tolower(*d++);
 538     *c = *d;
 539
 540     lhost = (char *)alloc(strlen(host)+1);
 541     c = lhost, d=host;
 542     while( *d != '\0')
 543         *c++ = (char)tolower(*d++);
 544     *c = *d;
 545
 546     i = match_word(lglob, lhost, (int)'.');
 547     amfree(lglob);
 548     amfree(lhost);
 549     return i;
 550 }
 551
 552
 553 int
 554 match_disk(
 555     const char *        glob,
 556     const char *        disk)
 557 {
 558     return match_word(glob, disk, '/');
 559 }
 560
 561 static int
 562 alldigits(
 563     const char *str)
 564 {
 565     while (*str) {
 566         if (!isdigit((int)*(str++)))
 567             return 0;
 568     }
 569     return 1;
 570 }
 571
 572 int
 573 match_datestamp(
 574     const char *        dateexp,
 575     const char *        datestamp)
 576 {
 577     char *dash;
 578     size_t len, len_suffix;
 579     size_t len_prefix;
 580     char firstdate[100], lastdate[100];
 581     char mydateexp[100];
 582     int match_exact;
 583
 584     if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
 585         goto illegal;
 586     }
 587
 588     /* strip and ignore an initial "^" */
 589     if(dateexp[0] == '^') {
 590         strncpy(mydateexp, dateexp+1, sizeof(mydateexp)-1);
 591         mydateexp[sizeof(mydateexp)-1] = '\0';
 592     }
 593     else {
 594         strncpy(mydateexp, dateexp, sizeof(mydateexp)-1);
 595         mydateexp[sizeof(mydateexp)-1] = '\0';
 596     }
 597
 598     if(mydateexp[strlen(mydateexp)-1] == '$') {
 599         match_exact = 1;
 600         mydateexp[strlen(mydateexp)-1] = '\0';  /* strip the trailing $ */
 601     }
 602     else
 603         match_exact = 0;
 604
 605     /* a single dash represents a date range */
 606     if((dash = strchr(mydateexp,'-'))) {
 607         if(match_exact == 1 || strchr(dash+1, '-')) {
 608             goto illegal;
 609         }
 610
 611         /* format: XXXYYYY-ZZZZ, indicating dates XXXYYYY to XXXZZZZ */
 612
 613         len = (size_t)(dash - mydateexp);   /* length of XXXYYYY */
 614         len_suffix = strlen(dash) - 1;  /* length of ZZZZ */
 615         if (len_suffix > len) goto illegal;
 616         len_prefix = len - len_suffix; /* length of XXX */
 617
 618         dash++;
 619
 620         strncpy(firstdate, mydateexp, len);
 621         firstdate[len] = '\0';
 622         strncpy(lastdate, mydateexp, len_prefix);
 623         strncpy(&(lastdate[len_prefix]), dash, len_suffix);
 624         lastdate[len] = '\0';
 625         if (!alldigits(firstdate) || !alldigits(lastdate))
 626             goto illegal;
 627         if (strncmp(firstdate, lastdate, strlen(firstdate)) > 0)
 628             goto illegal;
 629         return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
 630                 (strncmp(datestamp, lastdate , strlen(lastdate))  <= 0));
 631     }
 632     else {
 633         if (!alldigits(mydateexp))
 634             goto illegal;
 635         if(match_exact == 1) {
 636             return (strcmp(datestamp, mydateexp) == 0);
 637         }
 638         else {
 639             return (strncmp(datestamp, mydateexp, strlen(mydateexp)) == 0);
 640         }
 641     }
 642 illegal:
 643         error(_("Illegal datestamp expression %s"),dateexp);
 644         /*NOTREACHED*/
 645 }
 646
 647
 648 int
 649 match_level(
 650     const char *        levelexp,
 651     const char *        level)
 652 {
 653     char *dash;
 654     size_t len, len_suffix;
 655     size_t len_prefix;
 656     char lowend[100], highend[100];
 657     char mylevelexp[100];
 658     int match_exact;
 659
 660     if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
 661         error(_("Illegal level expression %s"),levelexp);
 662         /*NOTREACHED*/
 663     }
 664
 665     if(levelexp[0] == '^') {
 666         strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
 667         mylevelexp[strlen(levelexp)-1] = '\0';
 668     }
 669     else {
 670         strncpy(mylevelexp, levelexp, strlen(levelexp));
 671         mylevelexp[strlen(levelexp)] = '\0';
 672     }
 673
 674     if(mylevelexp[strlen(mylevelexp)] == '$') {
 675         match_exact = 1;
 676         mylevelexp[strlen(mylevelexp)] = '\0';
 677     }
 678     else
 679         match_exact = 0;
 680
 681     if((dash = strchr(mylevelexp,'-'))) {
 682         if(match_exact == 1) {
 683             error(_("Illegal level expression %s"),levelexp);
 684             /*NOTREACHED*/
 685         }
 686         len = (size_t)(dash - mylevelexp);
 687         len_suffix = strlen(dash) - 1;
 688         len_prefix = len - len_suffix;
 689
 690         dash++;
 691         strncpy(lowend, mylevelexp, len);
 692         lowend[len] = '\0';
 693         strncpy(highend, mylevelexp, len_prefix);
 694         strncpy(&(highend[len_prefix]), dash, len_suffix);
 695         highend[len] = '\0';
 696         return ((strncmp(level, lowend, strlen(lowend)) >= 0) &&
 697                 (strncmp(level, highend , strlen(highend))  <= 0));
 698     }
 699     else {
 700         if(match_exact == 1) {
 701             return (strcmp(level, mylevelexp) == 0);
 702         }
 703         else {
 704             return (strncmp(level, mylevelexp, strlen(mylevelexp)) == 0);
 705         }
 706     }
 707 }