git.gag.com Git - debian/amanda/blob - common-src/match.c

   1 /*
   2  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
   3  * Copyright (c) 1991-1998 University of Maryland at College Park
   4  * All Rights Reserved.
   5  *
   6  * Permission to use, copy, modify, distribute, and sell this software and its
   7  * documentation for any purpose is hereby granted without fee, provided that
   8  * the above copyright notice appear in all copies and that both that
   9  * copyright notice and this permission notice appear in supporting
  10  * documentation, and that the name of U.M. not be used in advertising or
  11  * publicity pertaining to distribution of the software without specific,
  12  * written prior permission.  U.M. makes no representations about the
  13  * suitability of this software for any purpose.  It is provided "as is"
  14  * without express or implied warranty.
  15  *
  16  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
  18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  22  *
  23  * Authors: the Amanda Development Team.  Its members are listed in a
  24  * file named AUTHORS, in the root directory of this distribution.
  25  */
  26
  27 /*
  28  * See match.h for function prototypes and further explanations.
  29  */
  30
  31 #include "amanda.h"
  32 #include "match.h"
  33 #include <regex.h>
  34
  35 /*
  36  * DATA STRUCTURES, MACROS, STATIC DATA
  37  */
  38
  39 /*
  40  * Return codes used by try_match()
  41  */
  42
  43 #define MATCH_OK (1)
  44 #define MATCH_NONE (0)
  45 #define MATCH_ERROR (-1)
  46
  47 /*
  48  * Macro to tell whether a character is a regex metacharacter. Note that '*'
  49  * and '?' are NOT included: they are themselves special in globs.
  50  */
  51
  52 #define IS_REGEX_META(c) ( \
  53     (c) == '.' || (c) == '(' || (c) == ')' || (c) == '{' || (c) == '}' || \
  54     (c) == '+' || (c) == '^' || (c) == '$' || (c) == '|' \
  55 )
  56
  57 /*
  58  * Define a specific type to hold error messages in case regex compile/matching
  59  * fails
  60  */
  61
  62 typedef char regex_errbuf[STR_SIZE];
  63
  64 /*
  65  * Structure used by amglob_to_regex() to expand particular glob characters. Its
  66  * fields are:
  67  * - question_mark: what the question mark ('?') should be replaced with;
  68  * - star: what the star ('*') should be replaced with;
  69  * - double_star: what two consecutive stars should be replaced with.
  70  *
  71  * Note that apart from double_star, ALL OTHER FIELDS MUST NOT BE NULL.
  72  */
  73
  74 struct subst_table {
  75     const char *question_mark;
  76     const char *star;
  77     const char *double_star;
  78 };
  79
  80 /*
  81  * Susbtitution data for glob_to_regex()
  82  */
  83
  84 static struct subst_table glob_subst_stable = {
  85     "[^/]", /* question_mark */
  86     "[^/]*", /* star */
  87     NULL /* double_star */
  88 };
  89
  90 /*
  91  * Substitution data for tar_to_regex()
  92  */
  93
  94 static struct subst_table tar_subst_stable = {
  95     "[^/]", /* question_mark */
  96     ".*", /* star */
  97     NULL /* double_star */
  98 };
  99
 100 /*
 101  * Substitution data for match_word(): dot
 102  */
 103
 104 static struct subst_table mword_dot_subst_table = {
 105     "[^.]", /* question_mark */
 106     "[^.]*", /* star */
 107     ".*" /* double_star */
 108 };
 109
 110 /*
 111  * Substitution data for match_word(): slash
 112  */
 113
 114 static struct subst_table mword_slash_subst_table = {
 115     "[^/]", /* question_mark */
 116     "[^/]*", /* star */
 117     ".*" /* double_star */
 118 };
 119
 120 /*
 121  * match_word() specific data:
 122  * - re_double_sep: anchored regex matching two separators;
 123  * - re_separator: regex matching the separator;
 124  * - re_begin_full: regex matching the separator, anchored at the beginning;
 125  * - re_end_full: regex matching the separator, andchored at the end.
 126  */
 127
 128 struct mword_regexes {
 129     const char *re_double_sep;
 130     const char *re_begin_full;
 131     const char *re_separator;
 132     const char *re_end_full;
 133 };
 134
 135 static struct mword_regexes mword_dot_regexes = {
 136     "^\\.\\.$", /* re_double_sep */
 137     "^\\.", /* re_begin_full */
 138     "\\.", /* re_separator */
 139     "\\.$" /* re_end_full */
 140 };
 141
 142 static struct mword_regexes mword_slash_regexes = {
 143     "^\\/\\/$", /* re_double_sep */
 144     "^\\/", /* re_begin_full */
 145     "\\/", /* re_separator */
 146     "\\/$" /* re_end_full */
 147 };
 148
 149 /*
 150  * Regular expression caches, and a static mutex to protect initialization and
 151  * access. This may be unnecessarily coarse, but it is unknown at this time
 152  * whether GHashTable accesses are thread-safe, and get_regex_from_cache() may
 153  * be called from within threads, so play it safe.
 154  */
 155
 156 #if (GLIB_MAJOR_VERSION > 2 || (GLIB_MAJOR_VERSION == 2 && GLIB_MINOR_VERSION >= 31))
 157 # pragma GCC diagnostic push
 158 # pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 159   static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
 160 # pragma GCC diagnostic pop
 161 #else
 162   static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
 163 #endif
 164 static GHashTable *regex_cache = NULL, *regex_cache_newline = NULL;
 165
 166 /*
 167  * REGEX FUNCTIONS
 168  */
 169
 170 /*
 171  * Initialize regex caches. NOTE: this function MUST be called with
 172  * re_cache_mutex LOCKED, see get_regex_from_cache()
 173  */
 174
 175 static void init_regex_caches(void)
 176 {
 177     static gboolean initialized = FALSE;
 178
 179     if (initialized)
 180         return;
 181
 182     regex_cache = g_hash_table_new(g_str_hash, g_str_equal);
 183     regex_cache_newline = g_hash_table_new(g_str_hash, g_str_equal);
 184
 185     initialized = TRUE;
 186 }
 187
 188 /*
 189  * Cleanup a regular expression by escaping all non alphanumeric characters, and
 190  * append beginning/end anchors if need be
 191  */
 192
 193 char *clean_regex(const char *str, gboolean anchor)
 194 {
 195     const char *src;
 196     char *result, *dst;
 197
 198     result = g_malloc(2 * strlen(str) + 3);
 199     dst = result;
 200
 201     if (anchor)
 202         *dst++ = '^';
 203
 204     for (src = str; *src; src++) {
 205         if (!g_ascii_isalnum((int) *src))
 206             *dst++ = '\\';
 207         *dst++ = *src;
 208     }
 209
 210     if (anchor)
 211         *dst++ = '$';
 212
 213     *dst = '\0';
 214     return result;
 215 }
 216
 217 /*
 218  * Compile one regular expression. Return TRUE if the regex has been compiled
 219  * successfully. Otherwise, return FALSE and copy the error message into the
 220  * supplied regex_errbuf pointer. Also, we want to know whether flags should
 221  * include REG_NEWLINE (See regcomp(3) for details). Since this is the more
 222  * frequent case, add REG_NEWLINE to the default flags, and remove it only if
 223  * match_newline is set to FALSE.
 224  */
 225
 226 static gboolean do_regex_compile(const char *str, regex_t *regex,
 227     regex_errbuf *errbuf, gboolean match_newline)
 228 {
 229     int flags = REG_EXTENDED | REG_NOSUB | REG_NEWLINE;
 230     int result;
 231
 232     if (!match_newline)
 233         flags &= ~REG_NEWLINE;
 234
 235     result = regcomp(regex, str, flags);
 236
 237     if (!result)
 238         return TRUE;
 239
 240     regerror(result, regex, *errbuf, sizeof(*errbuf));
 241     return FALSE;
 242 }
 243
 244 /*
 245  * Get an already compiled buffer from the regex cache. If the regex is not in
 246  * the cache, allocate a new one and compile it using do_regex_compile(). If the
 247  * compile fails, call regfree() on the object and return NULL to the caller. If
 248  * it does succeed, put the regex buffer in cache and return a pointer to it.
 249  */
 250
 251 static regex_t *get_regex_from_cache(const char *re_str, regex_errbuf *errbuf,
 252     gboolean match_newline)
 253 {
 254     regex_t *ret;
 255     GHashTable *cache;
 256
 257     g_static_mutex_lock(&re_cache_mutex);
 258
 259     init_regex_caches();
 260
 261     cache = (match_newline) ? regex_cache_newline: regex_cache;
 262     ret = g_hash_table_lookup(cache, re_str);
 263
 264     if (ret)
 265         goto out;
 266
 267     ret = g_new(regex_t, 1);
 268
 269     if (do_regex_compile(re_str, ret, errbuf, match_newline)) {
 270         g_hash_table_insert(cache, g_strdup(re_str), ret);
 271         goto out;
 272     }
 273
 274     regfree(ret);
 275     g_free(ret);
 276     ret = NULL;
 277
 278 out:
 279     g_static_mutex_unlock(&re_cache_mutex);
 280     return ret;
 281 }
 282
 283 /*
 284  * Validate one regular expression using do_regex_compile(), and return NULL if
 285  * the regex is valid, or the error message otherwise.
 286  */
 287
 288 char *validate_regexp(const char *regex)
 289 {
 290     regex_t regc;
 291     static regex_errbuf errmsg;
 292     gboolean valid;
 293
 294     valid = do_regex_compile(regex, &regc, &errmsg, TRUE);
 295
 296     regfree(&regc);
 297     return (valid) ? NULL : errmsg;
 298 }
 299
 300 /*
 301  * See if a string matches a compiled regular expression. Return one of MATCH_*
 302  * defined above. If, for some reason, regexec() returns something other than
 303  * not 0 or REG_NOMATCH, return MATCH_ERROR and print the error message in the
 304  * supplied regex_errbuf.
 305  */
 306
 307 static int try_match(regex_t *regex, const char *str,
 308     regex_errbuf *errbuf)
 309 {
 310     int result = regexec(regex, str, 0, 0, 0);
 311
 312     switch(result) {
 313         case 0:
 314             return MATCH_OK;
 315         case REG_NOMATCH:
 316             return MATCH_NONE;
 317         /* Fall through: something went really wrong */
 318     }
 319
 320     regerror(result, regex, *errbuf, sizeof(*errbuf));
 321     return MATCH_ERROR;
 322 }
 323
 324 /*
 325  * Try and match a string against a regular expression, using
 326  * do_regex_compile() and try_match(). Exit early if the regex didn't compile
 327  * or there was an error during matching.
 328  */
 329
 330 int do_match(const char *regex, const char *str, gboolean match_newline)
 331 {
 332     regex_t *re;
 333     int result;
 334     regex_errbuf errmsg;
 335
 336     re = get_regex_from_cache(regex, &errmsg, match_newline);
 337
 338     if (!re)
 339         error("regex \"%s\": %s", regex, errmsg);
 340         /*NOTREACHED*/
 341
 342     result = try_match(re, str, &errmsg);
 343
 344     if (result == MATCH_ERROR)
 345         error("regex \"%s\": %s", regex, errmsg);
 346         /*NOTREACHED*/
 347
 348     return result;
 349 }
 350
 351 /*
 352  * DISK/HOST EXPRESSION HANDLING
 353  */
 354
 355 /*
 356  * Check whether a given character should be escaped (that is, prepended with a
 357  * backslash), EXCEPT for one character.
 358  */
 359
 360 static gboolean should_be_escaped_except(char c, char not_this_one)
 361 {
 362     if (c == not_this_one)
 363         return FALSE;
 364
 365     switch (c) {
 366         case '\\':
 367         case '^':
 368         case '$':
 369         case '?':
 370         case '*':
 371         case '[':
 372         case ']':
 373         case '.':
 374         case '/':
 375             return TRUE;
 376     }
 377
 378     return FALSE;
 379 }
 380
 381 /*
 382  * Take a disk/host expression and turn it into a full-blown amglob (with
 383  * start and end anchors) following rules in amanda-match(7). The not_this_one
 384  * argument represents a character which is NOT meant to be special in this
 385  * case: '/' for disks and '.' for hosts.
 386  */
 387
 388 static char *full_amglob_from_expression(const char *str, char not_this_one)
 389 {
 390     const char *src;
 391     char *result, *dst;
 392
 393     result = g_malloc(2 * strlen(str) + 3);
 394     dst = result;
 395
 396     *dst++ = '^';
 397
 398     for (src = str; *src; src++) {
 399         if (should_be_escaped_except(*src, not_this_one))
 400             *dst++ = '\\';
 401         *dst++ = *src;
 402     }
 403
 404     *dst++ = '$';
 405     *dst = '\0';
 406     return result;
 407 }
 408
 409 /*
 410  * Turn a disk/host expression into a regex
 411  */
 412
 413 char *make_exact_disk_expression(const char *disk)
 414 {
 415     return full_amglob_from_expression(disk, '/');
 416 }
 417
 418 char *make_exact_host_expression(const char *host)
 419 {
 420     return full_amglob_from_expression(host, '.');
 421 }
 422
 423 /*
 424  * GLOB HANDLING, as per amanda-match(7)
 425  */
 426
 427 /*
 428  * Turn a glob into a regex.
 429  */
 430
 431 static char *amglob_to_regex(const char *str, const char *begin,
 432     const char *end, struct subst_table *table)
 433 {
 434     const char *src;
 435     char *result, *dst;
 436     char c;
 437     size_t worst_case;
 438     gboolean double_star = (table->double_star != NULL);
 439
 440     /*
 441      * There are two particular cases when building a regex out of a glob:
 442      * character classes (anything inside [...] or [!...] and quotes (anything
 443      * preceded by a backslash). We start with none being true.
 444      */
 445
 446     gboolean in_character_class = FALSE, in_quote = FALSE;
 447
 448     /*
 449      * Allocate enough space for our string. At worst, the allocated space is
 450      * the length of the following:
 451      * - beginning of regex;
 452      * - size of original string multiplied by worst-case expansion;
 453      * - end of regex;
 454      * - final 0.
 455      *
 456      * Calculate the worst case expansion by walking our struct subst_table.
 457      */
 458
 459     worst_case = strlen(table->question_mark);
 460
 461     if (worst_case < strlen(table->star))
 462         worst_case = strlen(table->star);
 463
 464     if (double_star && worst_case < strlen(table->double_star))
 465         worst_case = strlen(table->double_star);
 466
 467     result = g_malloc(strlen(begin) + strlen(str) * worst_case + strlen(end) + 1);
 468
 469     /*
 470      * Start by copying the beginning of the regex...
 471      */
 472
 473     dst = g_stpcpy(result, begin);
 474
 475     /*
 476      * ... Now to the meat of it.
 477      */
 478
 479     for (src = str; *src; src++) {
 480         c = *src;
 481
 482         /*
 483          * First, check that we're in a character class: each and every
 484          * character can be copied as is. We only need to be careful is the
 485          * character is a closing bracket: it will end the character class IF
 486          * AND ONLY IF it is not preceded by a backslash.
 487          */
 488
 489         if (in_character_class) {
 490             in_character_class = ((c != ']') || (*(src - 1) == '\\'));
 491             goto straight_copy;
 492         }
 493
 494         /*
 495          * Are we in a quote? If yes, it is really simple: copy the current
 496          * character, close the quote, the end.
 497          */
 498
 499         if (in_quote) {
 500             in_quote = FALSE;
 501             goto straight_copy;
 502         }
 503
 504         /*
 505          * The only thing left to handle now is the "normal" case: we are not in
 506          * a character class nor in a quote.
 507          */
 508
 509         if (c == '\\') {
 510             /*
 511              * Backslash: append it, and open a new quote.
 512              */
 513             in_quote = TRUE;
 514             goto straight_copy;
 515         } else if (c == '[') {
 516             /*
 517              * Opening bracket: the beginning of a character class.
 518              *
 519              * Look ahead the next character: if it's an exclamation mark, then
 520              * this is a complemented character class; append a caret to make
 521              * the result string regex-friendly, and forward one character in
 522              * advance.
 523              */
 524             *dst++ = c;
 525             in_character_class = TRUE;
 526             if (*(src + 1) == '!') {
 527                 *dst++ = '^';
 528                 src++;
 529             }
 530         } else if (IS_REGEX_META(c)) {
 531             /*
 532              * Regex metacharacter (except for ? and *, see below): append a
 533              * backslash, and then the character itself.
 534              */
 535             *dst++ = '\\';
 536             goto straight_copy;
 537         } else if (c == '?')
 538             /*
 539              * Question mark: take the subsitution string out of our subst_table
 540              * and append it to the string.
 541              */
 542             dst = g_stpcpy(dst, table->question_mark);
 543         else if (c == '*') {
 544             /*
 545              * Star: append the subsitution string found in our subst_table.
 546              * However, look forward the next character: if it's yet another
 547              * star, then see if there is a substitution string for the double
 548              * star and append this one instead.
 549              *
 550              * FIXME: this means that two consecutive stars in a glob string
 551              * where there is no substition for double_star can lead to
 552              * exponential regex execution time: consider [^/]*[^/]*.
 553              */
 554             const char *p = table->star;
 555             if (double_star && *(src + 1) == '*') {
 556                 src++;
 557                 p = table->double_star;
 558             }
 559             dst = g_stpcpy(dst, p);
 560         } else {
 561             /*
 562              * Any other character: append each time.
 563              */
 564 straight_copy:
 565             *dst++ = c;
 566         }
 567     }
 568
 569     /*
 570      * Done, now append the end, ONLY if we are not in a quote - a lone
 571      * backslash at the end of a glob is illegal, just leave it as it, it will
 572      * make the regex compile fail.
 573      */
 574
 575     if (!in_quote)
 576         dst = g_stpcpy(dst, end);
 577     /*
 578      * Finalize, return.
 579      */
 580
 581     *dst = '\0';
 582     return result;
 583 }
 584
 585 /*
 586  * File globs
 587  */
 588
 589 char *glob_to_regex(const char *glob)
 590 {
 591     return amglob_to_regex(glob, "^", "$", &glob_subst_stable);
 592 }
 593
 594 int match_glob(const char *glob, const char *str)
 595 {
 596     char *regex;
 597     regex_t *re;
 598     int result;
 599     regex_errbuf errmsg;
 600
 601     regex = glob_to_regex(glob);
 602     re = get_regex_from_cache(regex, &errmsg, TRUE);
 603
 604     if (!re)
 605         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 606         /*NOTREACHED*/
 607
 608     result = try_match(re, str, &errmsg);
 609
 610     if (result == MATCH_ERROR)
 611         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 612         /*NOTREACHED*/
 613
 614     g_free(regex);
 615
 616     return result;
 617 }
 618
 619 char *validate_glob(const char *glob)
 620 {
 621     char *regex, *ret = NULL;
 622     regex_t regc;
 623     static regex_errbuf errmsg;
 624
 625     regex = glob_to_regex(glob);
 626
 627     if (!do_regex_compile(regex, &regc, &errmsg, TRUE))
 628         ret = errmsg;
 629
 630     regfree(&regc);
 631     g_free(regex);
 632     return ret;
 633 }
 634
 635 /*
 636  * Tar globs
 637  */
 638
 639 static char *tar_to_regex(const char *glob)
 640 {
 641     return amglob_to_regex(glob, "(^|/)", "($|/)", &tar_subst_stable);
 642 }
 643
 644 int match_tar(const char *glob, const char *str)
 645 {
 646     char *regex;
 647     regex_t *re;
 648     int result;
 649     regex_errbuf errmsg;
 650
 651     regex = tar_to_regex(glob);
 652     re = get_regex_from_cache(regex, &errmsg, TRUE);
 653
 654     if (!re)
 655         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 656         /*NOTREACHED*/
 657
 658     result = try_match(re, str, &errmsg);
 659
 660     if (result == MATCH_ERROR)
 661         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 662         /*NOTREACHED*/
 663
 664     g_free(regex);
 665
 666     return result;
 667 }
 668
 669 /*
 670  * DISK/HOST MATCHING
 671  *
 672  * The functions below wrap input strings with separators and attempt to match
 673  * the result. The core of the operation is the match_word() function.
 674  */
 675
 676 /*
 677  * Check whether a glob passed as an argument to match_word() only looks for the
 678  * separator
 679  */
 680
 681 static gboolean glob_is_separator_only(const char *glob, char sep) {
 682     size_t len = strlen(glob);
 683     const char len2_1[] = { '^', sep , 0 }, len2_2[] = { sep, '$', 0 },
 684         len3[] = { '^', sep, '$', 0 };
 685
 686     switch (len) {
 687         case 1:
 688             return (*glob == sep);
 689         case 2:
 690             return !(!g_str_equal(glob, len2_1) && !g_str_equal(glob, len2_2));
 691         case 3:
 692             return g_str_equal(glob, len3);
 693         default:
 694             return FALSE;
 695     }
 696 }
 697
 698 /*
 699  * Given a word and a separator as an argument, wrap the word with separators -
 700  * if need be. For instance, if '/' is the separator, the rules are:
 701  *
 702  * - "" -> "/"
 703  * - "/" -> "//"
 704  * - "//" -> left alone
 705  * - "xxx" -> "/xxx/"
 706  * - "/xxx" -> "/xxx/"
 707  * - "xxx/" -> "/xxx/"
 708  * - "/xxx/" -> left alone
 709  *
 710  * (note that xxx here may contain the separator as well)
 711  *
 712  * Note that the returned string is dynamically allocated: it is up to the
 713  * caller to free it. Note also that the first argument MUST NOT BE NULL.
 714  */
 715
 716 static char *wrap_word(const char *word, const char separator, const char *glob)
 717 {
 718     size_t len = strlen(word);
 719     size_t len_glob = strlen(glob);
 720     char *result, *p;
 721
 722     /*
 723      * We allocate for the worst case, which is two bytes more than the input
 724      * (have to prepend and append a separator).
 725      */
 726     result = g_malloc(len + 3);
 727     p = result;
 728
 729     /*
 730      * Zero-length: separator only
 731      */
 732
 733     if (len == 0) {
 734         *p++ = separator;
 735         goto out;
 736     }
 737
 738     /*
 739      * Length is one: if the only character is the separator only, the result
 740      * string is two separators
 741      */
 742
 743     if (len == 1 && word[0] == separator) {
 744         *p++ = separator;
 745         *p++ = separator;
 746         goto out;
 747     }
 748
 749     /*
 750      * Otherwise: prepend the separator if needed, append the separator if
 751      * needed.
 752      */
 753
 754     if (word[0] != separator && glob[0] != '^')
 755         *p++ = separator;
 756
 757     p = g_stpcpy(p, word);
 758
 759     if (word[len - 1] != separator && glob[len_glob-1] != '$')
 760         *p++ = separator;
 761
 762 out:
 763     *p++ = '\0';
 764     return result;
 765 }
 766
 767 static int match_word(const char *glob, const char *word, const char separator)
 768 {
 769     char *wrapped_word = wrap_word(word, separator, glob);
 770     struct mword_regexes *regexes = &mword_slash_regexes;
 771     struct subst_table *table = &mword_slash_subst_table;
 772     gboolean not_slash = (separator != '/');
 773     int ret;
 774
 775     /*
 776      * We only expect two separators: '/' or '.'. If it's not '/', it has to be
 777      * the other one...
 778      */
 779     if (not_slash) {
 780         regexes = &mword_dot_regexes;
 781         table = &mword_dot_subst_table;
 782     }
 783
 784     if(glob_is_separator_only(glob, separator)) {
 785         ret = do_match(regexes->re_double_sep, wrapped_word, TRUE);
 786         goto out;
 787     } else {
 788         /*
 789          * Unlike what happens for tar and disk expressions, we need to
 790          * calculate the beginning and end of our regex before calling
 791          * amglob_to_regex().
 792          */
 793
 794         const char *begin, *end;
 795         char *glob_copy = g_strdup(glob);
 796         char *p, *g = glob_copy;
 797         char *regex;
 798
 799         /*
 800          * Calculate the beginning of the regex:
 801          * - by default, it is an unanchored separator;
 802          * - if the glob begins with a caret, make that an anchored separator,
 803          *   and increment g appropriately;
 804          * - if it begins with a separator, make it the empty string.
 805          */
 806
 807         p = glob_copy;
 808         begin = regexes->re_separator;
 809
 810         if (*p == '^') {
 811             begin = "^";
 812             p++, g++;
 813             if (*p == separator) {
 814                 begin = regexes->re_begin_full;
 815                 g++;
 816             }
 817         } else if (*p == separator)
 818             begin = "";
 819
 820         /*
 821          * Calculate the end of the regex:
 822          * - an unanchored separator by default;
 823          * - if the last character is a backslash or the separator itself, it
 824          *   should be the empty string;
 825          * - if it is a dollar sign, overwrite it with 0 and look at the
 826          *   character before it: if it is the separator, only anchor at the
 827          *   end, otherwise, add a separator before the anchor.
 828          */
 829
 830         p = &(glob_copy[strlen(glob_copy) - 1]);
 831         end = regexes->re_separator;
 832         if (*p == '\\' || *p == separator) {
 833             end = "";
 834         } else if (*p == '$') {
 835             char prev = *(p - 1);
 836             *p = '\0';
 837             if (prev == separator) {
 838                 *(p-1) = '\0';
 839                 if (p-2 >= glob_copy) {
 840                     prev = *(p - 2);
 841                     if (prev == '\\') {
 842                         *(p-2) = '\0';
 843                     }
 844                 }
 845                 end = regexes->re_end_full;
 846             } else {
 847                 end = "$";
 848             }
 849         }
 850
 851         regex = amglob_to_regex(g, begin, end, table);
 852         ret = do_match(regex, wrapped_word, TRUE);
 853
 854         g_free(glob_copy);
 855         g_free(regex);
 856     }
 857
 858 out:
 859     g_free(wrapped_word);
 860     return ret;
 861 }
 862
 863 /*
 864  * Match a host expression
 865  */
 866
 867 int match_host(const char *glob, const char *host)
 868 {
 869     char *lglob, *lhost;
 870     int ret;
 871
 872     lglob = g_ascii_strdown(glob, -1);
 873     lhost = g_ascii_strdown(host, -1);
 874
 875     ret = match_word(lglob, lhost, '.');
 876
 877     g_free(lglob);
 878     g_free(lhost);
 879     return ret;
 880 }
 881
 882 /*
 883  * Match a disk expression. Not as straightforward, since Windows paths must be
 884  * accounted for.
 885  */
 886
 887 /*
 888  * Convert a disk and glob from Windows expressed paths (backslashes) into Unix
 889  * paths (slashes).
 890  *
 891  * Note: the resulting string is dynamically allocated, it is up to the caller
 892  * to free it.
 893  *
 894  * Note 2: UNC in convert_unc_to_unix stands for Uniform Naming Convention.
 895  */
 896
 897 static char *convert_unc_to_unix(const char *unc)
 898 {
 899     char *result = g_strdup(unc);
 900     return g_strdelimit(result, "\\", '/');
 901 }
 902
 903 static char *convert_winglob_to_unix(const char *glob)
 904 {
 905     const char *src;
 906     char *result, *dst;
 907     result = g_malloc(strlen(glob) + 1);
 908     dst = result;
 909
 910     for (src = glob; *src; src++) {
 911         if (*src == '\\' && *(src + 1) == '\\') {
 912             *dst++ = '/';
 913             src++;
 914             continue;
 915         }
 916         *dst++ = *src;
 917     }
 918     *dst = '\0';
 919     return result;
 920 }
 921
 922 /*
 923  * Match a disk expression
 924  */
 925
 926 int match_disk(const char *glob, const char *disk)
 927 {
 928     char *glob2 = NULL, *disk2 = NULL;
 929     const char *g = glob, *d = disk;
 930     int result;
 931
 932     /*
 933      * Check whether our disk potentially refers to a Windows share (the first
 934      * two characters are '\' and there is no / in the word at all): if yes,
 935      * build Unix paths instead and pass those as arguments to match_word()
 936      */
 937
 938     gboolean windows_share = !(strncmp(disk, "\\\\", 2) || strchr(disk, '/'));
 939
 940     if (windows_share) {
 941         glob2 = convert_winglob_to_unix(glob);
 942         disk2 = convert_unc_to_unix(disk);
 943         g = (const char *) glob2;
 944         d = (const char *) disk2;
 945     }
 946
 947     result = match_word(g, d, '/');
 948
 949     /*
 950      * We can g_free(NULL), so this is "safe"
 951      */
 952     g_free(glob2);
 953     g_free(disk2);
 954
 955     return result;
 956 }
 957
 958 /*
 959  * TIMESTAMPS/LEVEL MATCHING
 960  */
 961
 962 static int
 963 alldigits(
 964     const char *str)
 965 {
 966     while (*str) {
 967         if (!isdigit((int)*(str++)))
 968             return 0;
 969     }
 970     return 1;
 971 }
 972
 973 int
 974 match_datestamp(
 975     const char *        dateexp,
 976     const char *        datestamp)
 977 {
 978     char *dash;
 979     size_t len, len_suffix;
 980     size_t len_prefix;
 981     char firstdate[100], lastdate[100];
 982     char mydateexp[100];
 983     int match_exact;
 984
 985     if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
 986         goto illegal;
 987     }
 988
 989     /* strip and ignore an initial "^" */
 990     if(dateexp[0] == '^') {
 991         strncpy(mydateexp, dateexp+1, sizeof(mydateexp)-1);
 992         mydateexp[sizeof(mydateexp)-1] = '\0';
 993     }
 994     else {
 995         strncpy(mydateexp, dateexp, sizeof(mydateexp)-1);
 996         mydateexp[sizeof(mydateexp)-1] = '\0';
 997     }
 998
 999     if(strlen(dateexp) < 1) {
1000         goto illegal;
1001     }
1002
1003     if(mydateexp[strlen(mydateexp)-1] == '$') {
1004         match_exact = 1;
1005         mydateexp[strlen(mydateexp)-1] = '\0';  /* strip the trailing $ */
1006     }
1007     else
1008         match_exact = 0;
1009
1010     /* a single dash represents a date range */
1011     if((dash = strchr(mydateexp,'-'))) {
1012         if(match_exact == 1 || strchr(dash+1, '-')) {
1013             goto illegal;
1014         }
1015
1016         /* format: XXXYYYY-ZZZZ, indicating dates XXXYYYY to XXXZZZZ */
1017
1018         len = (size_t)(dash - mydateexp);   /* length of XXXYYYY */
1019         len_suffix = strlen(dash) - 1;  /* length of ZZZZ */
1020         if (len_suffix > len) goto illegal;
1021         if (len < len_suffix) {
1022             goto illegal;
1023         }
1024         len_prefix = len - len_suffix; /* length of XXX */
1025
1026         dash++;
1027
1028         strncpy(firstdate, mydateexp, len);
1029         firstdate[len] = '\0';
1030         strncpy(lastdate, mydateexp, len_prefix);
1031         strncpy(&(lastdate[len_prefix]), dash, len_suffix);
1032         lastdate[len] = '\0';
1033         if (!alldigits(firstdate) || !alldigits(lastdate))
1034             goto illegal;
1035         if (strncmp(firstdate, lastdate, strlen(firstdate)) > 0)
1036             goto illegal;
1037         return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
1038                 (strncmp(datestamp, lastdate , strlen(lastdate))  <= 0));
1039     }
1040     else {
1041         if (!alldigits(mydateexp))
1042             goto illegal;
1043         if(match_exact == 1) {
1044             return (g_str_equal(datestamp, mydateexp));
1045         }
1046         else {
1047             return (g_str_has_prefix(datestamp, mydateexp));
1048         }
1049     }
1050 illegal:
1051         error("Illegal datestamp expression %s", dateexp);
1052         /*NOTREACHED*/
1053 }
1054
1055
1056 int
1057 match_level(
1058     const char *        levelexp,
1059     const char *        level)
1060 {
1061     char *dash;
1062     long int low, hi, level_i;
1063     char mylevelexp[100];
1064     int match_exact;
1065
1066     if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
1067         error("Illegal level expression %s", levelexp);
1068         /*NOTREACHED*/
1069     }
1070
1071     if(levelexp[0] == '^') {
1072         strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
1073         mylevelexp[strlen(levelexp)-1] = '\0';
1074         if (strlen(levelexp) == 0) {
1075             error("Illegal level expression %s", levelexp);
1076             /*NOTREACHED*/
1077         }
1078     }
1079     else {
1080         strncpy(mylevelexp, levelexp, strlen(levelexp));
1081         mylevelexp[strlen(levelexp)] = '\0';
1082     }
1083
1084     if(mylevelexp[strlen(mylevelexp)-1] == '$') {
1085         match_exact = 1;
1086         mylevelexp[strlen(mylevelexp)-1] = '\0';
1087     }
1088     else
1089         match_exact = 0;
1090
1091     if((dash = strchr(mylevelexp,'-'))) {
1092         if(match_exact == 1) {
1093             goto illegal;
1094         }
1095
1096         *dash = '\0';
1097         if (!alldigits(mylevelexp) || !alldigits(dash+1)) goto illegal;
1098
1099         errno = 0;
1100         low = strtol(mylevelexp, (char **) NULL, 10);
1101         if (errno) goto illegal;
1102         hi = strtol(dash+1, (char **) NULL, 10);
1103         if (errno) goto illegal;
1104         level_i = strtol(level, (char **) NULL, 10);
1105         if (errno) goto illegal;
1106
1107         return ((level_i >= low) && (level_i <= hi));
1108     }
1109     else {
1110         if (!alldigits(mylevelexp)) goto illegal;
1111         if(match_exact == 1) {
1112             return (g_str_equal(level, mylevelexp));
1113         }
1114         else {
1115             return (g_str_has_prefix(level, mylevelexp));
1116         }
1117     }
1118 illegal:
1119     error("Illegal level expression %s", levelexp);
1120     /*NOTREACHED*/
1121 }