git.gag.com Git - debian/amanda/blob - common-src/match.c

   1 /*
   2  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
   3  * Copyright (c) 1991-1998 University of Maryland at College Park
   4  * All Rights Reserved.
   5  *
   6  * Permission to use, copy, modify, distribute, and sell this software and its
   7  * documentation for any purpose is hereby granted without fee, provided that
   8  * the above copyright notice appear in all copies and that both that
   9  * copyright notice and this permission notice appear in supporting
  10  * documentation, and that the name of U.M. not be used in advertising or
  11  * publicity pertaining to distribution of the software without specific,
  12  * written prior permission.  U.M. makes no representations about the
  13  * suitability of this software for any purpose.  It is provided "as is"
  14  * without express or implied warranty.
  15  *
  16  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
  18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  22  *
  23  * Authors: the Amanda Development Team.  Its members are listed in a
  24  * file named AUTHORS, in the root directory of this distribution.
  25  */
  26
  27 /*
  28  * See match.h for function prototypes and further explanations.
  29  */
  30
  31 #include "amanda.h"
  32 #include "match.h"
  33 #include <regex.h>
  34
  35 /*
  36  * DATA STRUCTURES, MACROS, STATIC DATA
  37  */
  38
  39 /*
  40  * Return codes used by try_match()
  41  */
  42
  43 #define MATCH_OK (1)
  44 #define MATCH_NONE (0)
  45 #define MATCH_ERROR (-1)
  46
  47 /*
  48  * Macro to tell whether a character is a regex metacharacter. Note that '*'
  49  * and '?' are NOT included: they are themselves special in globs.
  50  */
  51
  52 #define IS_REGEX_META(c) ( \
  53     (c) == '.' || (c) == '(' || (c) == ')' || (c) == '{' || (c) == '}' || \
  54     (c) == '+' || (c) == '^' || (c) == '$' || (c) == '|' \
  55 )
  56
  57 /*
  58  * Define a specific type to hold error messages in case regex compile/matching
  59  * fails
  60  */
  61
  62 typedef char regex_errbuf[STR_SIZE];
  63
  64 /*
  65  * Structure used by amglob_to_regex() to expand particular glob characters. Its
  66  * fields are:
  67  * - question_mark: what the question mark ('?') should be replaced with;
  68  * - star: what the star ('*') should be replaced with;
  69  * - double_star: what two consecutive stars should be replaced with.
  70  *
  71  * Note that apart from double_star, ALL OTHER FIELDS MUST NOT BE NULL.
  72  */
  73
  74 struct subst_table {
  75     const char *question_mark;
  76     const char *star;
  77     const char *double_star;
  78 };
  79
  80 /*
  81  * Susbtitution data for glob_to_regex()
  82  */
  83
  84 static struct subst_table glob_subst_stable = {
  85     "[^/]", /* question_mark */
  86     "[^/]*", /* star */
  87     NULL /* double_star */
  88 };
  89
  90 /*
  91  * Substitution data for tar_to_regex()
  92  */
  93
  94 static struct subst_table tar_subst_stable = {
  95     "[^/]", /* question_mark */
  96     ".*", /* star */
  97     NULL /* double_star */
  98 };
  99
 100 /*
 101  * Substitution data for match_word(): dot
 102  */
 103
 104 static struct subst_table mword_dot_subst_table = {
 105     "[^.]", /* question_mark */
 106     "[^.]*", /* star */
 107     ".*" /* double_star */
 108 };
 109
 110 /*
 111  * Substitution data for match_word(): slash
 112  */
 113
 114 static struct subst_table mword_slash_subst_table = {
 115     "[^/]", /* question_mark */
 116     "[^/]*", /* star */
 117     ".*" /* double_star */
 118 };
 119
 120 /*
 121  * match_word() specific data:
 122  * - re_double_sep: anchored regex matching two separators;
 123  * - re_separator: regex matching the separator;
 124  * - re_begin_full: regex matching the separator, anchored at the beginning;
 125  * - re_end_full: regex matching the separator, andchored at the end.
 126  */
 127
 128 struct mword_regexes {
 129     const char *re_double_sep;
 130     const char *re_begin_full;
 131     const char *re_separator;
 132     const char *re_end_full;
 133 };
 134
 135 static struct mword_regexes mword_dot_regexes = {
 136     "^\\.\\.$", /* re_double_sep */
 137     "^\\.", /* re_begin_full */
 138     "\\.", /* re_separator */
 139     "\\.$" /* re_end_full */
 140 };
 141
 142 static struct mword_regexes mword_slash_regexes = {
 143     "^\\/\\/$", /* re_double_sep */
 144     "^\\/", /* re_begin_full */
 145     "\\/", /* re_separator */
 146     "\\/$" /* re_end_full */
 147 };
 148
 149 /*
 150  * Regular expression caches, and a static mutex to protect initialization and
 151  * access. This may be unnecessarily coarse, but it is unknown at this time
 152  * whether GHashTable accesses are thread-safe, and get_regex_from_cache() may
 153  * be called from within threads, so play it safe.
 154  */
 155
 156 static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
 157 static GHashTable *regex_cache = NULL, *regex_cache_newline = NULL;
 158
 159 /*
 160  * REGEX FUNCTIONS
 161  */
 162
 163 /*
 164  * Initialize regex caches. NOTE: this function MUST be called with
 165  * re_cache_mutex LOCKED, see get_regex_from_cache()
 166  */
 167
 168 static void init_regex_caches(void)
 169 {
 170     static gboolean initialized = FALSE;
 171
 172     if (initialized)
 173         return;
 174
 175     regex_cache = g_hash_table_new(g_str_hash, g_str_equal);
 176     regex_cache_newline = g_hash_table_new(g_str_hash, g_str_equal);
 177
 178     initialized = TRUE;
 179 }
 180
 181 /*
 182  * Cleanup a regular expression by escaping all non alphanumeric characters, and
 183  * append beginning/end anchors if need be
 184  */
 185
 186 char *clean_regex(const char *str, gboolean anchor)
 187 {
 188     const char *src;
 189     char *result, *dst;
 190
 191     result = g_malloc(2 * strlen(str) + 3);
 192     dst = result;
 193
 194     if (anchor)
 195         *dst++ = '^';
 196
 197     for (src = str; *src; src++) {
 198         if (!g_ascii_isalnum((int) *src))
 199             *dst++ = '\\';
 200         *dst++ = *src;
 201     }
 202
 203     if (anchor)
 204         *dst++ = '$';
 205
 206     *dst = '\0';
 207     return result;
 208 }
 209
 210 /*
 211  * Compile one regular expression. Return TRUE if the regex has been compiled
 212  * successfully. Otherwise, return FALSE and copy the error message into the
 213  * supplied regex_errbuf pointer. Also, we want to know whether flags should
 214  * include REG_NEWLINE (See regcomp(3) for details). Since this is the more
 215  * frequent case, add REG_NEWLINE to the default flags, and remove it only if
 216  * match_newline is set to FALSE.
 217  */
 218
 219 static gboolean do_regex_compile(const char *str, regex_t *regex,
 220     regex_errbuf *errbuf, gboolean match_newline)
 221 {
 222     int flags = REG_EXTENDED | REG_NOSUB | REG_NEWLINE;
 223     int result;
 224
 225     if (!match_newline)
 226         flags &= ~REG_NEWLINE;
 227
 228     result = regcomp(regex, str, flags);
 229
 230     if (!result)
 231         return TRUE;
 232
 233     regerror(result, regex, *errbuf, sizeof(*errbuf));
 234     return FALSE;
 235 }
 236
 237 /*
 238  * Get an already compiled buffer from the regex cache. If the regex is not in
 239  * the cache, allocate a new one and compile it using do_regex_compile(). If the
 240  * compile fails, call regfree() on the object and return NULL to the caller. If
 241  * it does succeed, put the regex buffer in cache and return a pointer to it.
 242  */
 243
 244 static regex_t *get_regex_from_cache(const char *re_str, regex_errbuf *errbuf,
 245     gboolean match_newline)
 246 {
 247     regex_t *ret;
 248     GHashTable *cache;
 249
 250     g_static_mutex_lock(&re_cache_mutex);
 251
 252     init_regex_caches();
 253
 254     cache = (match_newline) ? regex_cache_newline: regex_cache;
 255     ret = g_hash_table_lookup(cache, re_str);
 256
 257     if (ret)
 258         goto out;
 259
 260     ret = g_new(regex_t, 1);
 261
 262     if (do_regex_compile(re_str, ret, errbuf, match_newline)) {
 263         g_hash_table_insert(cache, g_strdup(re_str), ret);
 264         goto out;
 265     }
 266
 267     regfree(ret);
 268     g_free(ret);
 269     ret = NULL;
 270
 271 out:
 272     g_static_mutex_unlock(&re_cache_mutex);
 273     return ret;
 274 }
 275
 276 /*
 277  * Validate one regular expression using do_regex_compile(), and return NULL if
 278  * the regex is valid, or the error message otherwise.
 279  */
 280
 281 char *validate_regexp(const char *regex)
 282 {
 283     regex_t regc;
 284     static regex_errbuf errmsg;
 285     gboolean valid;
 286
 287     valid = do_regex_compile(regex, &regc, &errmsg, TRUE);
 288
 289     regfree(&regc);
 290     return (valid) ? NULL : errmsg;
 291 }
 292
 293 /*
 294  * See if a string matches a compiled regular expression. Return one of MATCH_*
 295  * defined above. If, for some reason, regexec() returns something other than
 296  * not 0 or REG_NOMATCH, return MATCH_ERROR and print the error message in the
 297  * supplied regex_errbuf.
 298  */
 299
 300 static int try_match(regex_t *regex, const char *str,
 301     regex_errbuf *errbuf)
 302 {
 303     int result = regexec(regex, str, 0, 0, 0);
 304
 305     switch(result) {
 306         case 0:
 307             return MATCH_OK;
 308         case REG_NOMATCH:
 309             return MATCH_NONE;
 310         /* Fall through: something went really wrong */
 311     }
 312
 313     regerror(result, regex, *errbuf, sizeof(*errbuf));
 314     return MATCH_ERROR;
 315 }
 316
 317 /*
 318  * Try and match a string against a regular expression, using
 319  * do_regex_compile() and try_match(). Exit early if the regex didn't compile
 320  * or there was an error during matching.
 321  */
 322
 323 int do_match(const char *regex, const char *str, gboolean match_newline)
 324 {
 325     regex_t *re;
 326     int result;
 327     regex_errbuf errmsg;
 328
 329     re = get_regex_from_cache(regex, &errmsg, match_newline);
 330
 331     if (!re)
 332         error("regex \"%s\": %s", regex, errmsg);
 333         /*NOTREACHED*/
 334
 335     result = try_match(re, str, &errmsg);
 336
 337     if (result == MATCH_ERROR)
 338         error("regex \"%s\": %s", regex, errmsg);
 339         /*NOTREACHED*/
 340
 341     return result;
 342 }
 343
 344 /*
 345  * DISK/HOST EXPRESSION HANDLING
 346  */
 347
 348 /*
 349  * Check whether a given character should be escaped (that is, prepended with a
 350  * backslash), EXCEPT for one character.
 351  */
 352
 353 static gboolean should_be_escaped_except(char c, char not_this_one)
 354 {
 355     if (c == not_this_one)
 356         return FALSE;
 357
 358     switch (c) {
 359         case '\\':
 360         case '^':
 361         case '$':
 362         case '?':
 363         case '*':
 364         case '[':
 365         case ']':
 366         case '.':
 367         case '/':
 368             return TRUE;
 369     }
 370
 371     return FALSE;
 372 }
 373
 374 /*
 375  * Take a disk/host expression and turn it into a full-blown amglob (with
 376  * start and end anchors) following rules in amanda-match(7). The not_this_one
 377  * argument represents a character which is NOT meant to be special in this
 378  * case: '/' for disks and '.' for hosts.
 379  */
 380
 381 static char *full_amglob_from_expression(const char *str, char not_this_one)
 382 {
 383     const char *src;
 384     char *result, *dst;
 385
 386     result = g_malloc(2 * strlen(str) + 3);
 387     dst = result;
 388
 389     *dst++ = '^';
 390
 391     for (src = str; *src; src++) {
 392         if (should_be_escaped_except(*src, not_this_one))
 393             *dst++ = '\\';
 394         *dst++ = *src;
 395     }
 396
 397     *dst++ = '$';
 398     *dst = '\0';
 399     return result;
 400 }
 401
 402 /*
 403  * Turn a disk/host expression into a regex
 404  */
 405
 406 char *make_exact_disk_expression(const char *disk)
 407 {
 408     return full_amglob_from_expression(disk, '/');
 409 }
 410
 411 char *make_exact_host_expression(const char *host)
 412 {
 413     return full_amglob_from_expression(host, '.');
 414 }
 415
 416 /*
 417  * GLOB HANDLING, as per amanda-match(7)
 418  */
 419
 420 /*
 421  * Turn a glob into a regex.
 422  */
 423
 424 static char *amglob_to_regex(const char *str, const char *begin,
 425     const char *end, struct subst_table *table)
 426 {
 427     const char *src;
 428     char *result, *dst;
 429     char c;
 430     size_t worst_case;
 431     gboolean double_star = (table->double_star != NULL);
 432
 433     /*
 434      * There are two particular cases when building a regex out of a glob:
 435      * character classes (anything inside [...] or [!...] and quotes (anything
 436      * preceded by a backslash). We start with none being true.
 437      */
 438
 439     gboolean in_character_class = FALSE, in_quote = FALSE;
 440
 441     /*
 442      * Allocate enough space for our string. At worst, the allocated space is
 443      * the length of the following:
 444      * - beginning of regex;
 445      * - size of original string multiplied by worst-case expansion;
 446      * - end of regex;
 447      * - final 0.
 448      *
 449      * Calculate the worst case expansion by walking our struct subst_table.
 450      */
 451
 452     worst_case = strlen(table->question_mark);
 453
 454     if (worst_case < strlen(table->star))
 455         worst_case = strlen(table->star);
 456
 457     if (double_star && worst_case < strlen(table->double_star))
 458         worst_case = strlen(table->double_star);
 459
 460     result = g_malloc(strlen(begin) + strlen(str) * worst_case + strlen(end) + 1);
 461
 462     /*
 463      * Start by copying the beginning of the regex...
 464      */
 465
 466     dst = g_stpcpy(result, begin);
 467
 468     /*
 469      * ... Now to the meat of it.
 470      */
 471
 472     for (src = str; *src; src++) {
 473         c = *src;
 474
 475         /*
 476          * First, check that we're in a character class: each and every
 477          * character can be copied as is. We only need to be careful is the
 478          * character is a closing bracket: it will end the character class IF
 479          * AND ONLY IF it is not preceded by a backslash.
 480          */
 481
 482         if (in_character_class) {
 483             in_character_class = ((c != ']') || (*(src - 1) == '\\'));
 484             goto straight_copy;
 485         }
 486
 487         /*
 488          * Are we in a quote? If yes, it is really simple: copy the current
 489          * character, close the quote, the end.
 490          */
 491
 492         if (in_quote) {
 493             in_quote = FALSE;
 494             goto straight_copy;
 495         }
 496
 497         /*
 498          * The only thing left to handle now is the "normal" case: we are not in
 499          * a character class nor in a quote.
 500          */
 501
 502         if (c == '\\') {
 503             /*
 504              * Backslash: append it, and open a new quote.
 505              */
 506             in_quote = TRUE;
 507             goto straight_copy;
 508         } else if (c == '[') {
 509             /*
 510              * Opening bracket: the beginning of a character class.
 511              *
 512              * Look ahead the next character: if it's an exclamation mark, then
 513              * this is a complemented character class; append a caret to make
 514              * the result string regex-friendly, and forward one character in
 515              * advance.
 516              */
 517             *dst++ = c;
 518             in_character_class = TRUE;
 519             if (*(src + 1) == '!') {
 520                 *dst++ = '^';
 521                 src++;
 522             }
 523         } else if (IS_REGEX_META(c)) {
 524             /*
 525              * Regex metacharacter (except for ? and *, see below): append a
 526              * backslash, and then the character itself.
 527              */
 528             *dst++ = '\\';
 529             goto straight_copy;
 530         } else if (c == '?')
 531             /*
 532              * Question mark: take the subsitution string out of our subst_table
 533              * and append it to the string.
 534              */
 535             dst = g_stpcpy(dst, table->question_mark);
 536         else if (c == '*') {
 537             /*
 538              * Star: append the subsitution string found in our subst_table.
 539              * However, look forward the next character: if it's yet another
 540              * star, then see if there is a substitution string for the double
 541              * star and append this one instead.
 542              *
 543              * FIXME: this means that two consecutive stars in a glob string
 544              * where there is no substition for double_star can lead to
 545              * exponential regex execution time: consider [^/]*[^/]*.
 546              */
 547             const char *p = table->star;
 548             if (double_star && *(src + 1) == '*') {
 549                 src++;
 550                 p = table->double_star;
 551             }
 552             dst = g_stpcpy(dst, p);
 553         } else {
 554             /*
 555              * Any other character: append each time.
 556              */
 557 straight_copy:
 558             *dst++ = c;
 559         }
 560     }
 561
 562     /*
 563      * Done, now append the end, ONLY if we are not in a quote - a lone
 564      * backslash at the end of a glob is illegal, just leave it as it, it will
 565      * make the regex compile fail.
 566      */
 567
 568     if (!in_quote)
 569         dst = g_stpcpy(dst, end);
 570     /*
 571      * Finalize, return.
 572      */
 573
 574     *dst = '\0';
 575     return result;
 576 }
 577
 578 /*
 579  * File globs
 580  */
 581
 582 char *glob_to_regex(const char *glob)
 583 {
 584     return amglob_to_regex(glob, "^", "$", &glob_subst_stable);
 585 }
 586
 587 int match_glob(const char *glob, const char *str)
 588 {
 589     char *regex;
 590     regex_t *re;
 591     int result;
 592     regex_errbuf errmsg;
 593
 594     regex = glob_to_regex(glob);
 595     re = get_regex_from_cache(regex, &errmsg, TRUE);
 596
 597     if (!re)
 598         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 599         /*NOTREACHED*/
 600
 601     result = try_match(re, str, &errmsg);
 602
 603     if (result == MATCH_ERROR)
 604         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 605         /*NOTREACHED*/
 606
 607     g_free(regex);
 608
 609     return result;
 610 }
 611
 612 char *validate_glob(const char *glob)
 613 {
 614     char *regex, *ret = NULL;
 615     regex_t regc;
 616     static regex_errbuf errmsg;
 617
 618     regex = glob_to_regex(glob);
 619
 620     if (!do_regex_compile(regex, &regc, &errmsg, TRUE))
 621         ret = errmsg;
 622
 623     regfree(&regc);
 624     g_free(regex);
 625     return ret;
 626 }
 627
 628 /*
 629  * Tar globs
 630  */
 631
 632 static char *tar_to_regex(const char *glob)
 633 {
 634     return amglob_to_regex(glob, "(^|/)", "($|/)", &tar_subst_stable);
 635 }
 636
 637 int match_tar(const char *glob, const char *str)
 638 {
 639     char *regex;
 640     regex_t *re;
 641     int result;
 642     regex_errbuf errmsg;
 643
 644     regex = tar_to_regex(glob);
 645     re = get_regex_from_cache(regex, &errmsg, TRUE);
 646
 647     if (!re)
 648         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 649         /*NOTREACHED*/
 650
 651     result = try_match(re, str, &errmsg);
 652
 653     if (result == MATCH_ERROR)
 654         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 655         /*NOTREACHED*/
 656
 657     g_free(regex);
 658
 659     return result;
 660 }
 661
 662 /*
 663  * DISK/HOST MATCHING
 664  *
 665  * The functions below wrap input strings with separators and attempt to match
 666  * the result. The core of the operation is the match_word() function.
 667  */
 668
 669 /*
 670  * Check whether a glob passed as an argument to match_word() only looks for the
 671  * separator
 672  */
 673
 674 static gboolean glob_is_separator_only(const char *glob, char sep) {
 675     size_t len = strlen(glob);
 676     const char len2_1[] = { '^', sep , 0 }, len2_2[] = { sep, '$', 0 },
 677         len3[] = { '^', sep, '$', 0 };
 678
 679     switch (len) {
 680         case 1:
 681             return (*glob == sep);
 682         case 2:
 683             return !(!g_str_equal(glob, len2_1) && !g_str_equal(glob, len2_2));
 684         case 3:
 685             return g_str_equal(glob, len3);
 686         default:
 687             return FALSE;
 688     }
 689 }
 690
 691 /*
 692  * Given a word and a separator as an argument, wrap the word with separators -
 693  * if need be. For instance, if '/' is the separator, the rules are:
 694  *
 695  * - "" -> "/"
 696  * - "/" -> "//"
 697  * - "//" -> left alone
 698  * - "xxx" -> "/xxx/"
 699  * - "/xxx" -> "/xxx/"
 700  * - "xxx/" -> "/xxx/"
 701  * - "/xxx/" -> left alone
 702  *
 703  * (note that xxx here may contain the separator as well)
 704  *
 705  * Note that the returned string is dynamically allocated: it is up to the
 706  * caller to free it. Note also that the first argument MUST NOT BE NULL.
 707  */
 708
 709 static char *wrap_word(const char *word, const char separator, const char *glob)
 710 {
 711     size_t len = strlen(word);
 712     size_t len_glob = strlen(glob);
 713     char *result, *p;
 714
 715     /*
 716      * We allocate for the worst case, which is two bytes more than the input
 717      * (have to prepend and append a separator).
 718      */
 719     result = g_malloc(len + 3);
 720     p = result;
 721
 722     /*
 723      * Zero-length: separator only
 724      */
 725
 726     if (len == 0) {
 727         *p++ = separator;
 728         goto out;
 729     }
 730
 731     /*
 732      * Length is one: if the only character is the separator only, the result
 733      * string is two separators
 734      */
 735
 736     if (len == 1 && word[0] == separator) {
 737         *p++ = separator;
 738         *p++ = separator;
 739         goto out;
 740     }
 741
 742     /*
 743      * Otherwise: prepend the separator if needed, append the separator if
 744      * needed.
 745      */
 746
 747     if (word[0] != separator && glob[0] != '^')
 748         *p++ = separator;
 749
 750     p = g_stpcpy(p, word);
 751
 752     if (word[len - 1] != separator && glob[len_glob-1] != '$')
 753         *p++ = separator;
 754
 755 out:
 756     *p++ = '\0';
 757     return result;
 758 }
 759
 760 static int match_word(const char *glob, const char *word, const char separator)
 761 {
 762     char *wrapped_word = wrap_word(word, separator, glob);
 763     struct mword_regexes *regexes = &mword_slash_regexes;
 764     struct subst_table *table = &mword_slash_subst_table;
 765     gboolean not_slash = (separator != '/');
 766     int ret;
 767
 768     /*
 769      * We only expect two separators: '/' or '.'. If it's not '/', it has to be
 770      * the other one...
 771      */
 772     if (not_slash) {
 773         regexes = &mword_dot_regexes;
 774         table = &mword_dot_subst_table;
 775     }
 776
 777     if(glob_is_separator_only(glob, separator)) {
 778         ret = do_match(regexes->re_double_sep, wrapped_word, TRUE);
 779         goto out;
 780     } else {
 781         /*
 782          * Unlike what happens for tar and disk expressions, we need to
 783          * calculate the beginning and end of our regex before calling
 784          * amglob_to_regex().
 785          */
 786
 787         const char *begin, *end;
 788         char *glob_copy = g_strdup(glob);
 789         char *p, *g = glob_copy;
 790         char *regex;
 791
 792         /*
 793          * Calculate the beginning of the regex:
 794          * - by default, it is an unanchored separator;
 795          * - if the glob begins with a caret, make that an anchored separator,
 796          *   and increment g appropriately;
 797          * - if it begins with a separator, make it the empty string.
 798          */
 799
 800         p = glob_copy;
 801         begin = regexes->re_separator;
 802
 803         if (*p == '^') {
 804             begin = "^";
 805             p++, g++;
 806             if (*p == separator) {
 807                 begin = regexes->re_begin_full;
 808                 g++;
 809             }
 810         } else if (*p == separator)
 811             begin = "";
 812
 813         /*
 814          * Calculate the end of the regex:
 815          * - an unanchored separator by default;
 816          * - if the last character is a backslash or the separator itself, it
 817          *   should be the empty string;
 818          * - if it is a dollar sign, overwrite it with 0 and look at the
 819          *   character before it: if it is the separator, only anchor at the
 820          *   end, otherwise, add a separator before the anchor.
 821          */
 822
 823         p = &(glob_copy[strlen(glob_copy) - 1]);
 824         end = regexes->re_separator;
 825         if (*p == '\\' || *p == separator) {
 826             end = "";
 827         } else if (*p == '$') {
 828             char prev = *(p - 1);
 829             *p = '\0';
 830             if (prev == separator) {
 831                 *(p-1) = '\0';
 832                 if (p-2 >= glob_copy) {
 833                     prev = *(p - 2);
 834                     if (prev == '\\') {
 835                         *(p-2) = '\0';
 836                     }
 837                 }
 838                 end = regexes->re_end_full;
 839             } else {
 840                 end = "$";
 841             }
 842         }
 843
 844         regex = amglob_to_regex(g, begin, end, table);
 845         ret = do_match(regex, wrapped_word, TRUE);
 846
 847         g_free(glob_copy);
 848         g_free(regex);
 849     }
 850
 851 out:
 852     g_free(wrapped_word);
 853     return ret;
 854 }
 855
 856 /*
 857  * Match a host expression
 858  */
 859
 860 int match_host(const char *glob, const char *host)
 861 {
 862     char *lglob, *lhost;
 863     int ret;
 864
 865     lglob = g_ascii_strdown(glob, -1);
 866     lhost = g_ascii_strdown(host, -1);
 867
 868     ret = match_word(lglob, lhost, '.');
 869
 870     g_free(lglob);
 871     g_free(lhost);
 872     return ret;
 873 }
 874
 875 /*
 876  * Match a disk expression. Not as straightforward, since Windows paths must be
 877  * accounted for.
 878  */
 879
 880 /*
 881  * Convert a disk and glob from Windows expressed paths (backslashes) into Unix
 882  * paths (slashes).
 883  *
 884  * Note: the resulting string is dynamically allocated, it is up to the caller
 885  * to free it.
 886  *
 887  * Note 2: UNC in convert_unc_to_unix stands for Uniform Naming Convention.
 888  */
 889
 890 static char *convert_unc_to_unix(const char *unc)
 891 {
 892     char *result = g_strdup(unc);
 893     return g_strdelimit(result, "\\", '/');
 894 }
 895
 896 static char *convert_winglob_to_unix(const char *glob)
 897 {
 898     const char *src;
 899     char *result, *dst;
 900     result = g_malloc(strlen(glob) + 1);
 901     dst = result;
 902
 903     for (src = glob; *src; src++) {
 904         if (*src == '\\' && *(src + 1) == '\\') {
 905             *dst++ = '/';
 906             src++;
 907             continue;
 908         }
 909         *dst++ = *src;
 910     }
 911     *dst = '\0';
 912     return result;
 913 }
 914
 915 /*
 916  * Match a disk expression
 917  */
 918
 919 int match_disk(const char *glob, const char *disk)
 920 {
 921     char *glob2 = NULL, *disk2 = NULL;
 922     const char *g = glob, *d = disk;
 923     int result;
 924
 925     /*
 926      * Check whether our disk potentially refers to a Windows share (the first
 927      * two characters are '\' and there is no / in the word at all): if yes,
 928      * build Unix paths instead and pass those as arguments to match_word()
 929      */
 930
 931     gboolean windows_share = !(strncmp(disk, "\\\\", 2) || strchr(disk, '/'));
 932
 933     if (windows_share) {
 934         glob2 = convert_winglob_to_unix(glob);
 935         disk2 = convert_unc_to_unix(disk);
 936         g = (const char *) glob2;
 937         d = (const char *) disk2;
 938     }
 939
 940     result = match_word(g, d, '/');
 941
 942     /*
 943      * We can g_free(NULL), so this is "safe"
 944      */
 945     g_free(glob2);
 946     g_free(disk2);
 947
 948     return result;
 949 }
 950
 951 /*
 952  * TIMESTAMPS/LEVEL MATCHING
 953  */
 954
 955 static int
 956 alldigits(
 957     const char *str)
 958 {
 959     while (*str) {
 960         if (!isdigit((int)*(str++)))
 961             return 0;
 962     }
 963     return 1;
 964 }
 965
 966 int
 967 match_datestamp(
 968     const char *        dateexp,
 969     const char *        datestamp)
 970 {
 971     char *dash;
 972     size_t len, len_suffix;
 973     size_t len_prefix;
 974     char firstdate[100], lastdate[100];
 975     char mydateexp[100];
 976     int match_exact;
 977
 978     if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
 979         goto illegal;
 980     }
 981
 982     /* strip and ignore an initial "^" */
 983     if(dateexp[0] == '^') {
 984         strncpy(mydateexp, dateexp+1, sizeof(mydateexp)-1);
 985         mydateexp[sizeof(mydateexp)-1] = '\0';
 986     }
 987     else {
 988         strncpy(mydateexp, dateexp, sizeof(mydateexp)-1);
 989         mydateexp[sizeof(mydateexp)-1] = '\0';
 990     }
 991
 992     if(strlen(dateexp) < 1) {
 993         goto illegal;
 994     }
 995
 996     if(mydateexp[strlen(mydateexp)-1] == '$') {
 997         match_exact = 1;
 998         mydateexp[strlen(mydateexp)-1] = '\0';  /* strip the trailing $ */
 999     }
1000     else
1001         match_exact = 0;
1002
1003     /* a single dash represents a date range */
1004     if((dash = strchr(mydateexp,'-'))) {
1005         if(match_exact == 1 || strchr(dash+1, '-')) {
1006             goto illegal;
1007         }
1008
1009         /* format: XXXYYYY-ZZZZ, indicating dates XXXYYYY to XXXZZZZ */
1010
1011         len = (size_t)(dash - mydateexp);   /* length of XXXYYYY */
1012         len_suffix = strlen(dash) - 1;  /* length of ZZZZ */
1013         if (len_suffix > len) goto illegal;
1014         if (len < len_suffix) {
1015             goto illegal;
1016         }
1017         len_prefix = len - len_suffix; /* length of XXX */
1018
1019         dash++;
1020
1021         strncpy(firstdate, mydateexp, len);
1022         firstdate[len] = '\0';
1023         strncpy(lastdate, mydateexp, len_prefix);
1024         strncpy(&(lastdate[len_prefix]), dash, len_suffix);
1025         lastdate[len] = '\0';
1026         if (!alldigits(firstdate) || !alldigits(lastdate))
1027             goto illegal;
1028         if (strncmp(firstdate, lastdate, strlen(firstdate)) > 0)
1029             goto illegal;
1030         return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
1031                 (strncmp(datestamp, lastdate , strlen(lastdate))  <= 0));
1032     }
1033     else {
1034         if (!alldigits(mydateexp))
1035             goto illegal;
1036         if(match_exact == 1) {
1037             return (g_str_equal(datestamp, mydateexp));
1038         }
1039         else {
1040             return (g_str_has_prefix(datestamp, mydateexp));
1041         }
1042     }
1043 illegal:
1044         error("Illegal datestamp expression %s", dateexp);
1045         /*NOTREACHED*/
1046 }
1047
1048
1049 int
1050 match_level(
1051     const char *        levelexp,
1052     const char *        level)
1053 {
1054     char *dash;
1055     long int low, hi, level_i;
1056     char mylevelexp[100];
1057     int match_exact;
1058
1059     if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
1060         error("Illegal level expression %s", levelexp);
1061         /*NOTREACHED*/
1062     }
1063
1064     if(levelexp[0] == '^') {
1065         strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
1066         mylevelexp[strlen(levelexp)-1] = '\0';
1067         if (strlen(levelexp) == 0) {
1068             error("Illegal level expression %s", levelexp);
1069             /*NOTREACHED*/
1070         }
1071     }
1072     else {
1073         strncpy(mylevelexp, levelexp, strlen(levelexp));
1074         mylevelexp[strlen(levelexp)] = '\0';
1075     }
1076
1077     if(mylevelexp[strlen(mylevelexp)-1] == '$') {
1078         match_exact = 1;
1079         mylevelexp[strlen(mylevelexp)-1] = '\0';
1080     }
1081     else
1082         match_exact = 0;
1083
1084     if((dash = strchr(mylevelexp,'-'))) {
1085         if(match_exact == 1) {
1086             goto illegal;
1087         }
1088
1089         *dash = '\0';
1090         if (!alldigits(mylevelexp) || !alldigits(dash+1)) goto illegal;
1091
1092         errno = 0;
1093         low = strtol(mylevelexp, (char **) NULL, 10);
1094         if (errno) goto illegal;
1095         hi = strtol(dash+1, (char **) NULL, 10);
1096         if (errno) goto illegal;
1097         level_i = strtol(level, (char **) NULL, 10);
1098         if (errno) goto illegal;
1099
1100         return ((level_i >= low) && (level_i <= hi));
1101     }
1102     else {
1103         if (!alldigits(mylevelexp)) goto illegal;
1104         if(match_exact == 1) {
1105             return (g_str_equal(level, mylevelexp));
1106         }
1107         else {
1108             return (g_str_has_prefix(level, mylevelexp));
1109         }
1110     }
1111 illegal:
1112     error("Illegal level expression %s", levelexp);
1113     /*NOTREACHED*/
1114 }