git.gag.com Git - debian/amanda/blob - common-src/match.c

   1 /*
   2  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
   3  * Copyright (c) 1991-1998 University of Maryland at College Park
   4  * All Rights Reserved.
   5  *
   6  * Permission to use, copy, modify, distribute, and sell this software and its
   7  * documentation for any purpose is hereby granted without fee, provided that
   8  * the above copyright notice appear in all copies and that both that
   9  * copyright notice and this permission notice appear in supporting
  10  * documentation, and that the name of U.M. not be used in advertising or
  11  * publicity pertaining to distribution of the software without specific,
  12  * written prior permission.  U.M. makes no representations about the
  13  * suitability of this software for any purpose.  It is provided "as is"
  14  * without express or implied warranty.
  15  *
  16  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
  18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  22  *
  23  * Authors: the Amanda Development Team.  Its members are listed in a
  24  * file named AUTHORS, in the root directory of this distribution.
  25  */
  26 /*
  27  * $Id: match.c,v 1.23 2006/05/25 01:47:12 johnfranks Exp $
  28  *
  29  * functions for checking and matching regular expressions
  30  */
  31
  32 #include "amanda.h"
  33 #include <regex.h>
  34
  35 static int match_word(const char *glob, const char *word, const char separator);
  36
  37 char *
  38 validate_regexp(
  39     const char *        regex)
  40 {
  41     regex_t regc;
  42     int result;
  43     static char errmsg[STR_SIZE];
  44
  45     if ((result = regcomp(&regc, regex,
  46                           REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  47       regerror(result, &regc, errmsg, SIZEOF(errmsg));
  48       return errmsg;
  49     }
  50
  51     regfree(&regc);
  52
  53     return NULL;
  54 }
  55
  56 char *
  57 clean_regex(
  58     const char *        regex)
  59 {
  60     char *result;
  61     int j;
  62     size_t i;
  63     result = alloc(2*strlen(regex)+1);
  64
  65     for(i=0,j=0;i<strlen(regex);i++) {
  66         if(!isalnum((int)regex[i]))
  67             result[j++]='\\';
  68         result[j++]=regex[i];
  69     }
  70     result[j] = '\0';
  71     return result;
  72 }
  73
  74 int
  75 match(
  76     const char *        regex,
  77     const char *        str)
  78 {
  79     regex_t regc;
  80     int result;
  81     char errmsg[STR_SIZE];
  82
  83     if((result = regcomp(&regc, regex,
  84                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
  85         regerror(result, &regc, errmsg, SIZEOF(errmsg));
  86         error("regex \"%s\": %s", regex, errmsg);
  87         /*NOTREACHED*/
  88     }
  89
  90     if((result = regexec(&regc, str, 0, 0, 0)) != 0
  91        && result != REG_NOMATCH) {
  92         regerror(result, &regc, errmsg, SIZEOF(errmsg));
  93         error("regex \"%s\": %s", regex, errmsg);
  94         /*NOTREACHED*/
  95     }
  96
  97     regfree(&regc);
  98
  99     return result == 0;
 100 }
 101
 102 char *
 103 validate_glob(
 104     const char *        glob)
 105 {
 106     char *regex;
 107     regex_t regc;
 108     int result;
 109     static char errmsg[STR_SIZE];
 110
 111     regex = glob_to_regex(glob);
 112     if ((result = regcomp(&regc, regex,
 113                           REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 114       regerror(result, &regc, errmsg, SIZEOF(errmsg));
 115       amfree(regex);
 116       return errmsg;
 117     }
 118
 119     regfree(&regc);
 120     amfree(regex);
 121
 122     return NULL;
 123 }
 124
 125 int
 126 match_glob(
 127     const char *        glob,
 128     const char *        str)
 129 {
 130     char *regex;
 131     regex_t regc;
 132     int result;
 133     char errmsg[STR_SIZE];
 134
 135     regex = glob_to_regex(glob);
 136     if((result = regcomp(&regc, regex,
 137                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 138         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 139         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 140         /*NOTREACHED*/
 141     }
 142
 143     if((result = regexec(&regc, str, 0, 0, 0)) != 0
 144        && result != REG_NOMATCH) {
 145         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 146         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 147         /*NOTREACHED*/
 148     }
 149
 150     regfree(&regc);
 151     amfree(regex);
 152
 153     return result == 0;
 154 }
 155
 156 char *
 157 glob_to_regex(
 158     const char *        glob)
 159 {
 160     char *regex;
 161     char *r;
 162     size_t len;
 163     int ch;
 164     int last_ch;
 165
 166     /*
 167      * Allocate an area to convert into.  The worst case is a five to
 168      * one expansion.
 169      */
 170     len = strlen(glob);
 171     regex = alloc(1 + len * 5 + 1 + 1);
 172
 173     /*
 174      * Do the conversion:
 175      *
 176      *  ?      -> [^/]
 177      *  *      -> [^/]*
 178      *  [!...] -> [^...]
 179      *
 180      * The following are given a leading backslash to protect them
 181      * unless they already have a backslash:
 182      *
 183      *   ( ) { } + . ^ $ |
 184      *
 185      * Put a leading ^ and trailing $ around the result.  If the last
 186      * non-escaped character is \ leave the $ off to cause a syntax
 187      * error when the regex is compiled.
 188      */
 189
 190     r = regex;
 191     *r++ = '^';
 192     last_ch = '\0';
 193     for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
 194         if (last_ch == '\\') {
 195             *r++ = (char)ch;
 196             ch = '\0';                  /* so last_ch != '\\' next time */
 197         } else if (last_ch == '[' && ch == '!') {
 198             *r++ = '^';
 199         } else if (ch == '\\') {
 200             *r++ = (char)ch;
 201         } else if (ch == '*' || ch == '?') {
 202             *r++ = '[';
 203             *r++ = '^';
 204             *r++ = '/';
 205             *r++ = ']';
 206             if (ch == '*') {
 207                 *r++ = '*';
 208             }
 209         } else if (ch == '('
 210                    || ch == ')'
 211                    || ch == '{'
 212                    || ch == '}'
 213                    || ch == '+'
 214                    || ch == '.'
 215                    || ch == '^'
 216                    || ch == '$'
 217                    || ch == '|') {
 218             *r++ = '\\';
 219             *r++ = (char)ch;
 220         } else {
 221             *r++ = (char)ch;
 222         }
 223     }
 224     if (last_ch != '\\') {
 225         *r++ = '$';
 226     }
 227     *r = '\0';
 228
 229     return regex;
 230 }
 231
 232
 233 int
 234 match_tar(
 235     const char *        glob,
 236     const char *        str)
 237 {
 238     char *regex;
 239     regex_t regc;
 240     int result;
 241     char errmsg[STR_SIZE];
 242
 243     regex = tar_to_regex(glob);
 244     if((result = regcomp(&regc, regex,
 245                          REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
 246         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 247         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 248         /*NOTREACHED*/
 249     }
 250
 251     if((result = regexec(&regc, str, 0, 0, 0)) != 0
 252        && result != REG_NOMATCH) {
 253         regerror(result, &regc, errmsg, SIZEOF(errmsg));
 254         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 255         /*NOTREACHED*/
 256     }
 257
 258     regfree(&regc);
 259     amfree(regex);
 260
 261     return result == 0;
 262 }
 263
 264 char *
 265 tar_to_regex(
 266     const char *        glob)
 267 {
 268     char *regex;
 269     char *r;
 270     size_t len;
 271     int ch;
 272     int last_ch;
 273
 274     /*
 275      * Allocate an area to convert into.  The worst case is a five to
 276      * one expansion.
 277      */
 278     len = strlen(glob);
 279     regex = alloc(1 + len * 5 + 1 + 1);
 280
 281     /*
 282      * Do the conversion:
 283      *
 284      *  ?      -> [^/]
 285      *  *      -> .*
 286      *  [!...] -> [^...]
 287      *
 288      * The following are given a leading backslash to protect them
 289      * unless they already have a backslash:
 290      *
 291      *   ( ) { } + . ^ $ |
 292      *
 293      * Put a leading ^ and trailing $ around the result.  If the last
 294      * non-escaped character is \ leave the $ off to cause a syntax
 295      * error when the regex is compiled.
 296      */
 297
 298     r = regex;
 299     *r++ = '^';
 300     last_ch = '\0';
 301     for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
 302         if (last_ch == '\\') {
 303             *r++ = (char)ch;
 304             ch = '\0';                  /* so last_ch != '\\' next time */
 305         } else if (last_ch == '[' && ch == '!') {
 306             *r++ = '^';
 307         } else if (ch == '\\') {
 308             *r++ = (char)ch;
 309         } else if (ch == '*') {
 310             *r++ = '.';
 311             *r++ = '*';
 312         } else if (ch == '?') {
 313             *r++ = '[';
 314             *r++ = '^';
 315             *r++ = '/';
 316             *r++ = ']';
 317         } else if (ch == '('
 318                    || ch == ')'
 319                    || ch == '{'
 320                    || ch == '}'
 321                    || ch == '+'
 322                    || ch == '.'
 323                    || ch == '^'
 324                    || ch == '$'
 325                    || ch == '|') {
 326             *r++ = '\\';
 327             *r++ = (char)ch;
 328         } else {
 329             *r++ = (char)ch;
 330         }
 331     }
 332     if (last_ch != '\\') {
 333         *r++ = '$';
 334     }
 335     *r = '\0';
 336
 337     return regex;
 338 }
 339
 340
 341 static int
 342 match_word(
 343     const char *        glob,
 344     const char *        word,
 345     const char          separator)
 346 {
 347     char *regex;
 348     char *r;
 349     size_t  len;
 350     int  ch;
 351     int  last_ch;
 352     int  next_ch;
 353     size_t  lenword;
 354     char *nword;
 355     char *nglob;
 356     char *g;
 357     const char *w;
 358     int  i;
 359
 360     lenword = strlen(word);
 361     nword = (char *)alloc(lenword + 3);
 362
 363     r = nword;
 364     w = word;
 365     if(lenword == 1 && *w == separator) {
 366         *r++ = separator;
 367         *r++ = separator;
 368     }
 369     else {
 370         if(*w != separator)
 371             *r++ = separator;
 372         while(*w != '\0')
 373             *r++ = *w++;
 374         if(*(r-1) != separator)
 375             *r++ = separator;
 376     }
 377     *r = '\0';
 378
 379     /*
 380      * Allocate an area to convert into.  The worst case is a six to
 381      * one expansion.
 382      */
 383     len = strlen(glob);
 384     regex = (char *)alloc(1 + len * 6 + 1 + 1 + 2 + 2);
 385     r = regex;
 386     nglob = stralloc(glob);
 387     g = nglob;
 388
 389     if((len == 1 && nglob[0] == separator) ||
 390        (len == 2 && nglob[0] == '^' && nglob[1] == separator) ||
 391        (len == 2 && nglob[0] == separator && nglob[1] == '$') ||
 392        (len == 3 && nglob[0] == '^' && nglob[1] == separator &&
 393         nglob[2] == '$')) {
 394         *r++ = '^';
 395         *r++ = '\\';
 396         *r++ = separator;
 397         *r++ = '\\';
 398         *r++ = separator;
 399         *r++ = '$';
 400     }
 401     else {
 402         /*
 403          * Do the conversion:
 404          *
 405          *  ?      -> [^\separator]
 406          *  *      -> [^\separator]*
 407          *  [!...] -> [^...]
 408          *  **     -> .*
 409          *
 410          * The following are given a leading backslash to protect them
 411          * unless they already have a backslash:
 412          *
 413          *   ( ) { } + . ^ $ |
 414          *
 415          * If the last
 416          * non-escaped character is \ leave it to cause a syntax
 417          * error when the regex is compiled.
 418          */
 419
 420         if(*g == '^') {
 421             *r++ = '^';
 422             *r++ = '\\';        /* escape the separator */
 423             *r++ = separator;
 424             g++;
 425             if(*g == separator) g++;
 426         }
 427         else if(*g != separator) {
 428             *r++ = '\\';        /* add a leading \separator */
 429             *r++ = separator;
 430         }
 431         last_ch = '\0';
 432         for (ch = *g++; ch != '\0'; last_ch = ch, ch = *g++) {
 433             next_ch = *g;
 434             if (last_ch == '\\') {
 435                 *r++ = (char)ch;
 436                 ch = '\0';              /* so last_ch != '\\' next time */
 437             } else if (last_ch == '[' && ch == '!') {
 438                 *r++ = '^';
 439             } else if (ch == '\\') {
 440                 *r++ = (char)ch;
 441             } else if (ch == '*' || ch == '?') {
 442                 if(ch == '*' && next_ch == '*') {
 443                     *r++ = '.';
 444                     g++;
 445                 }
 446                 else {
 447                     *r++ = '[';
 448                     *r++ = '^';
 449                     *r++ = '\\';
 450                     *r++ = separator;
 451                     *r++ = ']';
 452                 }
 453                 if (ch == '*') {
 454                     *r++ = '*';
 455                 }
 456             } else if (ch == '$' && next_ch == '\0') {
 457                 if(last_ch != separator) {
 458                     *r++ = '\\';
 459                     *r++ = separator;
 460                 }
 461                 *r++ = (char)ch;
 462             } else if (   ch == '('
 463                        || ch == ')'
 464                        || ch == '{'
 465                        || ch == '}'
 466                        || ch == '+'
 467                        || ch == '.'
 468                        || ch == '^'
 469                        || ch == '$'
 470                        || ch == '|') {
 471                 *r++ = '\\';
 472                 *r++ = (char)ch;
 473             } else {
 474                 *r++ = (char)ch;
 475             }
 476         }
 477         if(last_ch != '\\') {
 478             if(last_ch != separator && last_ch != '$') {
 479                 *r++ = '\\';
 480                 *r++ = separator;               /* add a trailing \separator */
 481             }
 482         }
 483     }
 484     *r = '\0';
 485
 486     i = match(regex,nword);
 487
 488     amfree(nword);
 489     amfree(nglob);
 490     amfree(regex);
 491     return i;
 492 }
 493
 494
 495 int
 496 match_host(
 497     const char *        glob,
 498     const char *        host)
 499 {
 500     char *lglob, *lhost;
 501     char *c;
 502     const char *d;
 503     int i;
 504
 505
 506     lglob = (char *)alloc(strlen(glob)+1);
 507     c = lglob, d=glob;
 508     while( *d != '\0')
 509         *c++ = (char)tolower(*d++);
 510     *c = *d;
 511
 512     lhost = (char *)alloc(strlen(host)+1);
 513     c = lhost, d=host;
 514     while( *d != '\0')
 515         *c++ = (char)tolower(*d++);
 516     *c = *d;
 517
 518     i = match_word(lglob, lhost, (int)'.');
 519     amfree(lglob);
 520     amfree(lhost);
 521     return i;
 522 }
 523
 524
 525 int
 526 match_disk(
 527     const char *        glob,
 528     const char *        disk)
 529 {
 530     return match_word(glob, disk, '/');
 531 }
 532
 533 int
 534 match_datestamp(
 535     const char *        dateexp,
 536     const char *        datestamp)
 537 {
 538     char *dash;
 539     size_t len, len_suffix;
 540     size_t len_prefix;
 541     char firstdate[100], lastdate[100];
 542     char mydateexp[100];
 543     int match_exact;
 544
 545     if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
 546         error("Illegal datestamp expression %s",dateexp);
 547         /*NOTREACHED*/
 548     }
 549
 550     if(dateexp[0] == '^') {
 551         strncpy(mydateexp, dateexp+1, strlen(dateexp)-1);
 552         mydateexp[strlen(dateexp)-1] = '\0';
 553     }
 554     else {
 555         strncpy(mydateexp, dateexp, strlen(dateexp));
 556         mydateexp[strlen(dateexp)] = '\0';
 557     }
 558
 559     if(mydateexp[strlen(mydateexp)] == '$') {
 560         match_exact = 1;
 561         mydateexp[strlen(mydateexp)] = '\0';
 562     }
 563     else
 564         match_exact = 0;
 565
 566     if((dash = strchr(mydateexp,'-'))) {
 567         if(match_exact == 1) {
 568             error("Illegal datestamp expression %s",dateexp);
 569             /*NOTREACHED*/
 570         }
 571         len = (size_t)(dash - mydateexp);
 572         len_suffix = strlen(dash) - 1;
 573         len_prefix = len - len_suffix;
 574
 575         dash++;
 576         strncpy(firstdate, mydateexp, len);
 577         firstdate[len] = '\0';
 578         strncpy(lastdate, mydateexp, len_prefix);
 579         strncpy(&(lastdate[len_prefix]), dash, len_suffix);
 580         lastdate[len] = '\0';
 581         return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
 582                 (strncmp(datestamp, lastdate , strlen(lastdate))  <= 0));
 583     }
 584     else {
 585         if(match_exact == 1) {
 586             return (strcmp(datestamp, mydateexp) == 0);
 587         }
 588         else {
 589             return (strncmp(datestamp, mydateexp, strlen(mydateexp)) == 0);
 590         }
 591     }
 592 }
 593
 594
 595 int
 596 match_level(
 597     const char *        levelexp,
 598     const char *        level)
 599 {
 600     char *dash;
 601     size_t len, len_suffix;
 602     size_t len_prefix;
 603     char lowend[100], highend[100];
 604     char mylevelexp[100];
 605     int match_exact;
 606
 607     if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
 608         error("Illegal level expression %s",levelexp);
 609         /*NOTREACHED*/
 610     }
 611
 612     if(levelexp[0] == '^') {
 613         strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
 614         mylevelexp[strlen(levelexp)-1] = '\0';
 615     }
 616     else {
 617         strncpy(mylevelexp, levelexp, strlen(levelexp));
 618         mylevelexp[strlen(levelexp)] = '\0';
 619     }
 620
 621     if(mylevelexp[strlen(mylevelexp)] == '$') {
 622         match_exact = 1;
 623         mylevelexp[strlen(mylevelexp)] = '\0';
 624     }
 625     else
 626         match_exact = 0;
 627
 628     if((dash = strchr(mylevelexp,'-'))) {
 629         if(match_exact == 1) {
 630             error("Illegal level expression %s",levelexp);
 631             /*NOTREACHED*/
 632         }
 633         len = (size_t)(dash - mylevelexp);
 634         len_suffix = strlen(dash) - 1;
 635         len_prefix = len - len_suffix;
 636
 637         dash++;
 638         strncpy(lowend, mylevelexp, len);
 639         lowend[len] = '\0';
 640         strncpy(highend, mylevelexp, len_prefix);
 641         strncpy(&(highend[len_prefix]), dash, len_suffix);
 642         highend[len] = '\0';
 643         return ((strncmp(level, lowend, strlen(lowend)) >= 0) &&
 644                 (strncmp(level, highend , strlen(highend))  <= 0));
 645     }
 646     else {
 647         if(match_exact == 1) {
 648             return (strcmp(level, mylevelexp) == 0);
 649         }
 650         else {
 651             return (strncmp(level, mylevelexp, strlen(mylevelexp)) == 0);
 652         }
 653     }
 654 }