git.gag.com Git - debian/amanda/blob - common-src/match.c

   1 /*
   2  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
   3  * Copyright (c) 1991-1998 University of Maryland at College Park
   4  * Copyright (c) 2007-2012 Zmanda, Inc.  All Rights Reserved.
   5  * All Rights Reserved.
   6  *
   7  * Permission to use, copy, modify, distribute, and sell this software and its
   8  * documentation for any purpose is hereby granted without fee, provided that
   9  * the above copyright notice appear in all copies and that both that
  10  * copyright notice and this permission notice appear in supporting
  11  * documentation, and that the name of U.M. not be used in advertising or
  12  * publicity pertaining to distribution of the software without specific,
  13  * written prior permission.  U.M. makes no representations about the
  14  * suitability of this software for any purpose.  It is provided "as is"
  15  * without express or implied warranty.
  16  *
  17  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
  19  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  20  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  21  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  22  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  23  *
  24  * Authors: the Amanda Development Team.  Its members are listed in a
  25  * file named AUTHORS, in the root directory of this distribution.
  26  */
  27
  28 /*
  29  * See match.h for function prototypes and further explanations.
  30  */
  31
  32 #include "amanda.h"
  33 #include "match.h"
  34 #include <regex.h>
  35
  36 /*
  37  * DATA STRUCTURES, MACROS, STATIC DATA
  38  */
  39
  40 /*
  41  * Return codes used by try_match()
  42  */
  43
  44 #define MATCH_OK (1)
  45 #define MATCH_NONE (0)
  46 #define MATCH_ERROR (-1)
  47
  48 /*
  49  * Macro to tell whether a character is a regex metacharacter. Note that '*'
  50  * and '?' are NOT included: they are themselves special in globs.
  51  */
  52
  53 #define IS_REGEX_META(c) ( \
  54     (c) == '.' || (c) == '(' || (c) == ')' || (c) == '{' || (c) == '}' || \
  55     (c) == '+' || (c) == '^' || (c) == '$' || (c) == '|' \
  56 )
  57
  58 /*
  59  * Define a specific type to hold error messages in case regex compile/matching
  60  * fails
  61  */
  62
  63 typedef char regex_errbuf[STR_SIZE];
  64
  65 /*
  66  * Structure used by amglob_to_regex() to expand particular glob characters. Its
  67  * fields are:
  68  * - question_mark: what the question mark ('?') should be replaced with;
  69  * - star: what the star ('*') should be replaced with;
  70  * - double_star: what two consecutive stars should be replaced with.
  71  *
  72  * Note that apart from double_star, ALL OTHER FIELDS MUST NOT BE NULL.
  73  */
  74
  75 struct subst_table {
  76     const char *question_mark;
  77     const char *star;
  78     const char *double_star;
  79 };
  80
  81 /*
  82  * Susbtitution data for glob_to_regex()
  83  */
  84
  85 static struct subst_table glob_subst_stable = {
  86     "[^/]", /* question_mark */
  87     "[^/]*", /* star */
  88     NULL /* double_star */
  89 };
  90
  91 /*
  92  * Substitution data for tar_to_regex()
  93  */
  94
  95 static struct subst_table tar_subst_stable = {
  96     "[^/]", /* question_mark */
  97     ".*", /* star */
  98     NULL /* double_star */
  99 };
 100
 101 /*
 102  * Substitution data for match_word(): dot
 103  */
 104
 105 static struct subst_table mword_dot_subst_table = {
 106     "[^.]", /* question_mark */
 107     "[^.]*", /* star */
 108     ".*" /* double_star */
 109 };
 110
 111 /*
 112  * Substitution data for match_word(): slash
 113  */
 114
 115 static struct subst_table mword_slash_subst_table = {
 116     "[^/]", /* question_mark */
 117     "[^/]*", /* star */
 118     ".*" /* double_star */
 119 };
 120
 121 /*
 122  * match_word() specific data:
 123  * - re_double_sep: anchored regex matching two separators;
 124  * - re_separator: regex matching the separator;
 125  * - re_begin_full: regex matching the separator, anchored at the beginning;
 126  * - re_end_full: regex matching the separator, andchored at the end.
 127  */
 128
 129 struct mword_regexes {
 130     const char *re_double_sep;
 131     const char *re_begin_full;
 132     const char *re_separator;
 133     const char *re_end_full;
 134 };
 135
 136 static struct mword_regexes mword_dot_regexes = {
 137     "^\\.\\.$", /* re_double_sep */
 138     "^\\.", /* re_begin_full */
 139     "\\.", /* re_separator */
 140     "\\.$" /* re_end_full */
 141 };
 142
 143 static struct mword_regexes mword_slash_regexes = {
 144     "^\\/\\/$", /* re_double_sep */
 145     "^\\/", /* re_begin_full */
 146     "\\/", /* re_separator */
 147     "\\/$" /* re_end_full */
 148 };
 149
 150 /*
 151  * Regular expression caches, and a static mutex to protect initialization and
 152  * access. This may be unnecessarily coarse, but it is unknown at this time
 153  * whether GHashTable accesses are thread-safe, and get_regex_from_cache() may
 154  * be called from within threads, so play it safe.
 155  */
 156
 157 #if (GLIB_MAJOR_VERSION > 2 || (GLIB_MAJOR_VERSION == 2 && GLIB_MINOR_VERSION >= 31))
 158 # pragma GCC diagnostic push
 159 # pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 160   static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
 161 # pragma GCC diagnostic pop
 162 #else
 163   static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
 164 #endif
 165 static GHashTable *regex_cache = NULL, *regex_cache_newline = NULL;
 166
 167 /*
 168  * REGEX FUNCTIONS
 169  */
 170
 171 /*
 172  * Initialize regex caches. NOTE: this function MUST be called with
 173  * re_cache_mutex LOCKED, see get_regex_from_cache()
 174  */
 175
 176 static void init_regex_caches(void)
 177 {
 178     static gboolean initialized = FALSE;
 179
 180     if (initialized)
 181         return;
 182
 183     regex_cache = g_hash_table_new(g_str_hash, g_str_equal);
 184     regex_cache_newline = g_hash_table_new(g_str_hash, g_str_equal);
 185
 186     initialized = TRUE;
 187 }
 188
 189 /*
 190  * Cleanup a regular expression by escaping all non alphanumeric characters, and
 191  * append beginning/end anchors if need be
 192  */
 193
 194 char *clean_regex(const char *str, gboolean anchor)
 195 {
 196     const char *src;
 197     char *result, *dst;
 198
 199     result = g_malloc(2 * strlen(str) + 3);
 200     dst = result;
 201
 202     if (anchor)
 203         *dst++ = '^';
 204
 205     for (src = str; *src; src++) {
 206         if (!g_ascii_isalnum((int) *src))
 207             *dst++ = '\\';
 208         *dst++ = *src;
 209     }
 210
 211     if (anchor)
 212         *dst++ = '$';
 213
 214     *dst = '\0';
 215     return result;
 216 }
 217
 218 /*
 219  * Compile one regular expression. Return TRUE if the regex has been compiled
 220  * successfully. Otherwise, return FALSE and copy the error message into the
 221  * supplied regex_errbuf pointer. Also, we want to know whether flags should
 222  * include REG_NEWLINE (See regcomp(3) for details). Since this is the more
 223  * frequent case, add REG_NEWLINE to the default flags, and remove it only if
 224  * match_newline is set to FALSE.
 225  */
 226
 227 static gboolean do_regex_compile(const char *str, regex_t *regex,
 228     regex_errbuf *errbuf, gboolean match_newline)
 229 {
 230     int flags = REG_EXTENDED | REG_NOSUB | REG_NEWLINE;
 231     int result;
 232
 233     if (!match_newline)
 234         flags &= ~REG_NEWLINE;
 235
 236     result = regcomp(regex, str, flags);
 237
 238     if (!result)
 239         return TRUE;
 240
 241     regerror(result, regex, *errbuf, sizeof(*errbuf));
 242     return FALSE;
 243 }
 244
 245 /*
 246  * Get an already compiled buffer from the regex cache. If the regex is not in
 247  * the cache, allocate a new one and compile it using do_regex_compile(). If the
 248  * compile fails, call regfree() on the object and return NULL to the caller. If
 249  * it does succeed, put the regex buffer in cache and return a pointer to it.
 250  */
 251
 252 static regex_t *get_regex_from_cache(const char *re_str, regex_errbuf *errbuf,
 253     gboolean match_newline)
 254 {
 255     regex_t *ret;
 256     GHashTable *cache;
 257
 258     g_static_mutex_lock(&re_cache_mutex);
 259
 260     init_regex_caches();
 261
 262     cache = (match_newline) ? regex_cache_newline: regex_cache;
 263     ret = g_hash_table_lookup(cache, re_str);
 264
 265     if (ret)
 266         goto out;
 267
 268     ret = g_new(regex_t, 1);
 269
 270     if (do_regex_compile(re_str, ret, errbuf, match_newline)) {
 271         g_hash_table_insert(cache, g_strdup(re_str), ret);
 272         goto out;
 273     }
 274
 275     regfree(ret);
 276     g_free(ret);
 277     ret = NULL;
 278
 279 out:
 280     g_static_mutex_unlock(&re_cache_mutex);
 281     return ret;
 282 }
 283
 284 /*
 285  * Validate one regular expression using do_regex_compile(), and return NULL if
 286  * the regex is valid, or the error message otherwise.
 287  */
 288
 289 char *validate_regexp(const char *regex)
 290 {
 291     regex_t regc;
 292     static regex_errbuf errmsg;
 293     gboolean valid;
 294
 295     valid = do_regex_compile(regex, &regc, &errmsg, TRUE);
 296
 297     regfree(&regc);
 298     return (valid) ? NULL : errmsg;
 299 }
 300
 301 /*
 302  * See if a string matches a compiled regular expression. Return one of MATCH_*
 303  * defined above. If, for some reason, regexec() returns something other than
 304  * not 0 or REG_NOMATCH, return MATCH_ERROR and print the error message in the
 305  * supplied regex_errbuf.
 306  */
 307
 308 static int try_match(regex_t *regex, const char *str,
 309     regex_errbuf *errbuf)
 310 {
 311     int result = regexec(regex, str, 0, 0, 0);
 312
 313     switch(result) {
 314         case 0:
 315             return MATCH_OK;
 316         case REG_NOMATCH:
 317             return MATCH_NONE;
 318         /* Fall through: something went really wrong */
 319     }
 320
 321     regerror(result, regex, *errbuf, sizeof(*errbuf));
 322     return MATCH_ERROR;
 323 }
 324
 325 /*
 326  * Try and match a string against a regular expression, using
 327  * do_regex_compile() and try_match(). Exit early if the regex didn't compile
 328  * or there was an error during matching.
 329  */
 330
 331 int do_match(const char *regex, const char *str, gboolean match_newline)
 332 {
 333     regex_t *re;
 334     int result;
 335     regex_errbuf errmsg;
 336
 337     re = get_regex_from_cache(regex, &errmsg, match_newline);
 338
 339     if (!re)
 340         error("regex \"%s\": %s", regex, errmsg);
 341         /*NOTREACHED*/
 342
 343     result = try_match(re, str, &errmsg);
 344
 345     if (result == MATCH_ERROR)
 346         error("regex \"%s\": %s", regex, errmsg);
 347         /*NOTREACHED*/
 348
 349     return result;
 350 }
 351
 352 /*
 353  * DISK/HOST EXPRESSION HANDLING
 354  */
 355
 356 /*
 357  * Check whether a given character should be escaped (that is, prepended with a
 358  * backslash), EXCEPT for one character.
 359  */
 360
 361 static gboolean should_be_escaped_except(char c, char not_this_one)
 362 {
 363     if (c == not_this_one)
 364         return FALSE;
 365
 366     switch (c) {
 367         case '\\':
 368         case '^':
 369         case '$':
 370         case '?':
 371         case '*':
 372         case '[':
 373         case ']':
 374         case '.':
 375         case '/':
 376             return TRUE;
 377     }
 378
 379     return FALSE;
 380 }
 381
 382 /*
 383  * Take a disk/host expression and turn it into a full-blown amglob (with
 384  * start and end anchors) following rules in amanda-match(7). The not_this_one
 385  * argument represents a character which is NOT meant to be special in this
 386  * case: '/' for disks and '.' for hosts.
 387  */
 388
 389 static char *full_amglob_from_expression(const char *str, char not_this_one)
 390 {
 391     const char *src;
 392     char *result, *dst;
 393
 394     result = g_malloc(2 * strlen(str) + 3);
 395     dst = result;
 396
 397     *dst++ = '^';
 398
 399     for (src = str; *src; src++) {
 400         if (should_be_escaped_except(*src, not_this_one))
 401             *dst++ = '\\';
 402         *dst++ = *src;
 403     }
 404
 405     *dst++ = '$';
 406     *dst = '\0';
 407     return result;
 408 }
 409
 410 /*
 411  * Turn a disk/host expression into a regex
 412  */
 413
 414 char *make_exact_disk_expression(const char *disk)
 415 {
 416     return full_amglob_from_expression(disk, '/');
 417 }
 418
 419 char *make_exact_host_expression(const char *host)
 420 {
 421     return full_amglob_from_expression(host, '.');
 422 }
 423
 424 /*
 425  * GLOB HANDLING, as per amanda-match(7)
 426  */
 427
 428 /*
 429  * Turn a glob into a regex.
 430  */
 431
 432 static char *amglob_to_regex(const char *str, const char *begin,
 433     const char *end, struct subst_table *table)
 434 {
 435     const char *src;
 436     char *result, *dst;
 437     char c;
 438     size_t worst_case;
 439     gboolean double_star = (table->double_star != NULL);
 440
 441     /*
 442      * There are two particular cases when building a regex out of a glob:
 443      * character classes (anything inside [...] or [!...] and quotes (anything
 444      * preceded by a backslash). We start with none being true.
 445      */
 446
 447     gboolean in_character_class = FALSE, in_quote = FALSE;
 448
 449     /*
 450      * Allocate enough space for our string. At worst, the allocated space is
 451      * the length of the following:
 452      * - beginning of regex;
 453      * - size of original string multiplied by worst-case expansion;
 454      * - end of regex;
 455      * - final 0.
 456      *
 457      * Calculate the worst case expansion by walking our struct subst_table.
 458      */
 459
 460     worst_case = strlen(table->question_mark);
 461
 462     if (worst_case < strlen(table->star))
 463         worst_case = strlen(table->star);
 464
 465     if (double_star && worst_case < strlen(table->double_star))
 466         worst_case = strlen(table->double_star);
 467
 468     result = g_malloc(strlen(begin) + strlen(str) * worst_case + strlen(end) + 1);
 469
 470     /*
 471      * Start by copying the beginning of the regex...
 472      */
 473
 474     dst = g_stpcpy(result, begin);
 475
 476     /*
 477      * ... Now to the meat of it.
 478      */
 479
 480     for (src = str; *src; src++) {
 481         c = *src;
 482
 483         /*
 484          * First, check that we're in a character class: each and every
 485          * character can be copied as is. We only need to be careful is the
 486          * character is a closing bracket: it will end the character class IF
 487          * AND ONLY IF it is not preceded by a backslash.
 488          */
 489
 490         if (in_character_class) {
 491             in_character_class = ((c != ']') || (*(src - 1) == '\\'));
 492             goto straight_copy;
 493         }
 494
 495         /*
 496          * Are we in a quote? If yes, it is really simple: copy the current
 497          * character, close the quote, the end.
 498          */
 499
 500         if (in_quote) {
 501             in_quote = FALSE;
 502             goto straight_copy;
 503         }
 504
 505         /*
 506          * The only thing left to handle now is the "normal" case: we are not in
 507          * a character class nor in a quote.
 508          */
 509
 510         if (c == '\\') {
 511             /*
 512              * Backslash: append it, and open a new quote.
 513              */
 514             in_quote = TRUE;
 515             goto straight_copy;
 516         } else if (c == '[') {
 517             /*
 518              * Opening bracket: the beginning of a character class.
 519              *
 520              * Look ahead the next character: if it's an exclamation mark, then
 521              * this is a complemented character class; append a caret to make
 522              * the result string regex-friendly, and forward one character in
 523              * advance.
 524              */
 525             *dst++ = c;
 526             in_character_class = TRUE;
 527             if (*(src + 1) == '!') {
 528                 *dst++ = '^';
 529                 src++;
 530             }
 531         } else if (IS_REGEX_META(c)) {
 532             /*
 533              * Regex metacharacter (except for ? and *, see below): append a
 534              * backslash, and then the character itself.
 535              */
 536             *dst++ = '\\';
 537             goto straight_copy;
 538         } else if (c == '?')
 539             /*
 540              * Question mark: take the subsitution string out of our subst_table
 541              * and append it to the string.
 542              */
 543             dst = g_stpcpy(dst, table->question_mark);
 544         else if (c == '*') {
 545             /*
 546              * Star: append the subsitution string found in our subst_table.
 547              * However, look forward the next character: if it's yet another
 548              * star, then see if there is a substitution string for the double
 549              * star and append this one instead.
 550              *
 551              * FIXME: this means that two consecutive stars in a glob string
 552              * where there is no substition for double_star can lead to
 553              * exponential regex execution time: consider [^/]*[^/]*.
 554              */
 555             const char *p = table->star;
 556             if (double_star && *(src + 1) == '*') {
 557                 src++;
 558                 p = table->double_star;
 559             }
 560             dst = g_stpcpy(dst, p);
 561         } else {
 562             /*
 563              * Any other character: append each time.
 564              */
 565 straight_copy:
 566             *dst++ = c;
 567         }
 568     }
 569
 570     /*
 571      * Done, now append the end, ONLY if we are not in a quote - a lone
 572      * backslash at the end of a glob is illegal, just leave it as it, it will
 573      * make the regex compile fail.
 574      */
 575
 576     if (!in_quote)
 577         dst = g_stpcpy(dst, end);
 578     /*
 579      * Finalize, return.
 580      */
 581
 582     *dst = '\0';
 583     return result;
 584 }
 585
 586 /*
 587  * File globs
 588  */
 589
 590 char *glob_to_regex(const char *glob)
 591 {
 592     return amglob_to_regex(glob, "^", "$", &glob_subst_stable);
 593 }
 594
 595 int match_glob(const char *glob, const char *str)
 596 {
 597     char *regex;
 598     regex_t *re;
 599     int result;
 600     regex_errbuf errmsg;
 601
 602     regex = glob_to_regex(glob);
 603     re = get_regex_from_cache(regex, &errmsg, TRUE);
 604
 605     if (!re)
 606         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 607         /*NOTREACHED*/
 608
 609     result = try_match(re, str, &errmsg);
 610
 611     if (result == MATCH_ERROR)
 612         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 613         /*NOTREACHED*/
 614
 615     g_free(regex);
 616
 617     return result;
 618 }
 619
 620 char *validate_glob(const char *glob)
 621 {
 622     char *regex, *ret = NULL;
 623     regex_t regc;
 624     static regex_errbuf errmsg;
 625
 626     regex = glob_to_regex(glob);
 627
 628     if (!do_regex_compile(regex, &regc, &errmsg, TRUE))
 629         ret = errmsg;
 630
 631     regfree(&regc);
 632     g_free(regex);
 633     return ret;
 634 }
 635
 636 /*
 637  * Tar globs
 638  */
 639
 640 static char *tar_to_regex(const char *glob)
 641 {
 642     return amglob_to_regex(glob, "(^|/)", "($|/)", &tar_subst_stable);
 643 }
 644
 645 int match_tar(const char *glob, const char *str)
 646 {
 647     char *regex;
 648     regex_t *re;
 649     int result;
 650     regex_errbuf errmsg;
 651
 652     regex = tar_to_regex(glob);
 653     re = get_regex_from_cache(regex, &errmsg, TRUE);
 654
 655     if (!re)
 656         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 657         /*NOTREACHED*/
 658
 659     result = try_match(re, str, &errmsg);
 660
 661     if (result == MATCH_ERROR)
 662         error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
 663         /*NOTREACHED*/
 664
 665     g_free(regex);
 666
 667     return result;
 668 }
 669
 670 /*
 671  * DISK/HOST MATCHING
 672  *
 673  * The functions below wrap input strings with separators and attempt to match
 674  * the result. The core of the operation is the match_word() function.
 675  */
 676
 677 /*
 678  * Check whether a glob passed as an argument to match_word() only looks for the
 679  * separator
 680  */
 681
 682 static gboolean glob_is_separator_only(const char *glob, char sep) {
 683     size_t len = strlen(glob);
 684     const char len2_1[] = { '^', sep , 0 }, len2_2[] = { sep, '$', 0 },
 685         len3[] = { '^', sep, '$', 0 };
 686
 687     switch (len) {
 688         case 1:
 689             return (*glob == sep);
 690         case 2:
 691             return !(!g_str_equal(glob, len2_1) && !g_str_equal(glob, len2_2));
 692         case 3:
 693             return g_str_equal(glob, len3);
 694         default:
 695             return FALSE;
 696     }
 697 }
 698
 699 /*
 700  * Given a word and a separator as an argument, wrap the word with separators -
 701  * if need be. For instance, if '/' is the separator, the rules are:
 702  *
 703  * - "" -> "/"
 704  * - "/" -> "//"
 705  * - "//" -> left alone
 706  * - "xxx" -> "/xxx/"
 707  * - "/xxx" -> "/xxx/"
 708  * - "xxx/" -> "/xxx/"
 709  * - "/xxx/" -> left alone
 710  *
 711  * (note that xxx here may contain the separator as well)
 712  *
 713  * Note that the returned string is dynamically allocated: it is up to the
 714  * caller to free it. Note also that the first argument MUST NOT BE NULL.
 715  */
 716
 717 static char *wrap_word(const char *word, const char separator, const char *glob)
 718 {
 719     size_t len = strlen(word);
 720     size_t len_glob = strlen(glob);
 721     char *result, *p;
 722
 723     /*
 724      * We allocate for the worst case, which is two bytes more than the input
 725      * (have to prepend and append a separator).
 726      */
 727     result = g_malloc(len + 3);
 728     p = result;
 729
 730     /*
 731      * Zero-length: separator only
 732      */
 733
 734     if (len == 0) {
 735         *p++ = separator;
 736         goto out;
 737     }
 738
 739     /*
 740      * Length is one: if the only character is the separator only, the result
 741      * string is two separators
 742      */
 743
 744     if (len == 1 && word[0] == separator) {
 745         *p++ = separator;
 746         *p++ = separator;
 747         goto out;
 748     }
 749
 750     /*
 751      * Otherwise: prepend the separator if needed, append the separator if
 752      * needed.
 753      */
 754
 755     if (word[0] != separator && glob[0] != '^')
 756         *p++ = separator;
 757
 758     p = g_stpcpy(p, word);
 759
 760     if (word[len - 1] != separator && glob[len_glob-1] != '$')
 761         *p++ = separator;
 762
 763 out:
 764     *p++ = '\0';
 765     return result;
 766 }
 767
 768 static int match_word(const char *glob, const char *word, const char separator)
 769 {
 770     char *wrapped_word = wrap_word(word, separator, glob);
 771     struct mword_regexes *regexes = &mword_slash_regexes;
 772     struct subst_table *table = &mword_slash_subst_table;
 773     gboolean not_slash = (separator != '/');
 774     int ret;
 775
 776     /*
 777      * We only expect two separators: '/' or '.'. If it's not '/', it has to be
 778      * the other one...
 779      */
 780     if (not_slash) {
 781         regexes = &mword_dot_regexes;
 782         table = &mword_dot_subst_table;
 783     }
 784
 785     if(glob_is_separator_only(glob, separator)) {
 786         ret = do_match(regexes->re_double_sep, wrapped_word, TRUE);
 787         goto out;
 788     } else {
 789         /*
 790          * Unlike what happens for tar and disk expressions, we need to
 791          * calculate the beginning and end of our regex before calling
 792          * amglob_to_regex().
 793          */
 794
 795         const char *begin, *end;
 796         char *glob_copy = g_strdup(glob);
 797         char *p, *g = glob_copy;
 798         char *regex;
 799
 800         /*
 801          * Calculate the beginning of the regex:
 802          * - by default, it is an unanchored separator;
 803          * - if the glob begins with a caret, make that an anchored separator,
 804          *   and increment g appropriately;
 805          * - if it begins with a separator, make it the empty string.
 806          */
 807
 808         p = glob_copy;
 809         begin = regexes->re_separator;
 810
 811         if (*p == '^') {
 812             begin = "^";
 813             p++, g++;
 814             if (*p == separator) {
 815                 begin = regexes->re_begin_full;
 816                 g++;
 817             }
 818         } else if (*p == separator)
 819             begin = "";
 820
 821         /*
 822          * Calculate the end of the regex:
 823          * - an unanchored separator by default;
 824          * - if the last character is a backslash or the separator itself, it
 825          *   should be the empty string;
 826          * - if it is a dollar sign, overwrite it with 0 and look at the
 827          *   character before it: if it is the separator, only anchor at the
 828          *   end, otherwise, add a separator before the anchor.
 829          */
 830
 831         p = &(glob_copy[strlen(glob_copy) - 1]);
 832         end = regexes->re_separator;
 833         if (*p == '\\' || *p == separator) {
 834             end = "";
 835         } else if (*p == '$') {
 836             char prev = *(p - 1);
 837             *p = '\0';
 838             if (prev == separator) {
 839                 *(p-1) = '\0';
 840                 if (p-2 >= glob_copy) {
 841                     prev = *(p - 2);
 842                     if (prev == '\\') {
 843                         *(p-2) = '\0';
 844                     }
 845                 }
 846                 end = regexes->re_end_full;
 847             } else {
 848                 end = "$";
 849             }
 850         }
 851
 852         regex = amglob_to_regex(g, begin, end, table);
 853         ret = do_match(regex, wrapped_word, TRUE);
 854
 855         g_free(glob_copy);
 856         g_free(regex);
 857     }
 858
 859 out:
 860     g_free(wrapped_word);
 861     return ret;
 862 }
 863
 864 /*
 865  * Match a host expression
 866  */
 867
 868 int match_host(const char *glob, const char *host)
 869 {
 870     char *lglob, *lhost;
 871     int ret;
 872
 873     if (*glob == '=') {
 874         return strcmp(glob+1, host) == 0;
 875     }
 876     lglob = g_ascii_strdown(glob, -1);
 877     lhost = g_ascii_strdown(host, -1);
 878
 879     ret = match_word(lglob, lhost, '.');
 880
 881     g_free(lglob);
 882     g_free(lhost);
 883     return ret;
 884 }
 885
 886 /*
 887  * Match a disk expression. Not as straightforward, since Windows paths must be
 888  * accounted for.
 889  */
 890
 891 /*
 892  * Convert a disk and glob from Windows expressed paths (backslashes) into Unix
 893  * paths (slashes).
 894  *
 895  * Note: the resulting string is dynamically allocated, it is up to the caller
 896  * to free it.
 897  *
 898  * Note 2: UNC in convert_unc_to_unix stands for Uniform Naming Convention.
 899  */
 900
 901 static char *convert_unc_to_unix(const char *unc)
 902 {
 903     char *result = g_strdup(unc);
 904     return g_strdelimit(result, "\\", '/');
 905 }
 906
 907 static char *convert_winglob_to_unix(const char *glob)
 908 {
 909     const char *src;
 910     char *result, *dst;
 911     result = g_malloc(strlen(glob) + 1);
 912     dst = result;
 913
 914     for (src = glob; *src; src++) {
 915         if (*src == '\\' && *(src + 1) == '\\') {
 916             *dst++ = '/';
 917             src++;
 918             continue;
 919         }
 920         *dst++ = *src;
 921     }
 922     *dst = '\0';
 923     return result;
 924 }
 925
 926 /*
 927  * Match a disk expression
 928  */
 929
 930 int match_disk(const char *glob, const char *disk)
 931 {
 932     char *glob2 = NULL, *disk2 = NULL;
 933     const char *g = glob, *d = disk;
 934     int result;
 935
 936     /*
 937      * Check whether our disk potentially refers to a Windows share (the first
 938      * two characters are '\' and there is no / in the word at all): if yes,
 939      * build Unix paths instead and pass those as arguments to match_word()
 940      */
 941
 942     gboolean windows_share = !(strncmp(disk, "\\\\", 2) || strchr(disk, '/'));
 943
 944     if (*glob == '=') {
 945         return strcmp(glob+1, disk) == 0;
 946     }
 947
 948     if (windows_share) {
 949         glob2 = convert_winglob_to_unix(glob);
 950         disk2 = convert_unc_to_unix(disk);
 951         g = (const char *) glob2;
 952         d = (const char *) disk2;
 953     }
 954
 955     result = match_word(g, d, '/');
 956
 957     /*
 958      * We can g_free(NULL), so this is "safe"
 959      */
 960     g_free(glob2);
 961     g_free(disk2);
 962
 963     return result;
 964 }
 965
 966 /*
 967  * TIMESTAMPS/LEVEL MATCHING
 968  */
 969
 970 static int
 971 alldigits(
 972     const char *str)
 973 {
 974     while (*str) {
 975         if (!isdigit((int)*(str++)))
 976             return 0;
 977     }
 978     return 1;
 979 }
 980
 981 int
 982 match_datestamp(
 983     const char *        dateexp,
 984     const char *        datestamp)
 985 {
 986     char *dash;
 987     size_t len, len_suffix;
 988     size_t len_prefix;
 989     char firstdate[100], lastdate[100];
 990     char mydateexp[100];
 991     int match_exact;
 992
 993     if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
 994         goto illegal;
 995     }
 996
 997     if (*dateexp == '=') {
 998         return strcmp(dateexp+1, datestamp) == 0;
 999     }
1000
1001     /* strip and ignore an initial "^" */
1002     if(dateexp[0] == '^') {
1003         strncpy(mydateexp, dateexp+1, sizeof(mydateexp)-1);
1004         mydateexp[sizeof(mydateexp)-1] = '\0';
1005     }
1006     else {
1007         strncpy(mydateexp, dateexp, sizeof(mydateexp)-1);
1008         mydateexp[sizeof(mydateexp)-1] = '\0';
1009     }
1010
1011     if(strlen(dateexp) < 1) {
1012         goto illegal;
1013     }
1014
1015     if(mydateexp[strlen(mydateexp)-1] == '$') {
1016         match_exact = 1;
1017         mydateexp[strlen(mydateexp)-1] = '\0';  /* strip the trailing $ */
1018     }
1019     else
1020         match_exact = 0;
1021
1022     /* a single dash represents a date range */
1023     if((dash = strchr(mydateexp,'-'))) {
1024         if(match_exact == 1 || strchr(dash+1, '-')) {
1025             goto illegal;
1026         }
1027
1028         /* format: XXXYYYY-ZZZZ, indicating dates XXXYYYY to XXXZZZZ */
1029
1030         len = (size_t)(dash - mydateexp);   /* length of XXXYYYY */
1031         len_suffix = strlen(dash) - 1;  /* length of ZZZZ */
1032         if (len_suffix > len) goto illegal;
1033         if (len < len_suffix) {
1034             goto illegal;
1035         }
1036         len_prefix = len - len_suffix; /* length of XXX */
1037
1038         dash++;
1039
1040         strncpy(firstdate, mydateexp, len);
1041         firstdate[len] = '\0';
1042         strncpy(lastdate, mydateexp, len_prefix);
1043         strncpy(&(lastdate[len_prefix]), dash, len_suffix);
1044         lastdate[len] = '\0';
1045         if (!alldigits(firstdate) || !alldigits(lastdate))
1046             goto illegal;
1047         if (strncmp(firstdate, lastdate, strlen(firstdate)) > 0)
1048             goto illegal;
1049         return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
1050                 (strncmp(datestamp, lastdate , strlen(lastdate))  <= 0));
1051     }
1052     else {
1053         if (!alldigits(mydateexp))
1054             goto illegal;
1055         if(match_exact == 1) {
1056             return (g_str_equal(datestamp, mydateexp));
1057         }
1058         else {
1059             return (g_str_has_prefix(datestamp, mydateexp));
1060         }
1061     }
1062 illegal:
1063         error("Illegal datestamp expression %s", dateexp);
1064         /*NOTREACHED*/
1065 }
1066
1067
1068 int
1069 match_level(
1070     const char *        levelexp,
1071     const char *        level)
1072 {
1073     char *dash;
1074     long int low, hi, level_i;
1075     char mylevelexp[100];
1076     int match_exact;
1077
1078     if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
1079         error("Illegal level expression %s", levelexp);
1080         /*NOTREACHED*/
1081     }
1082
1083     if (*levelexp == '=') {
1084         return strcmp(levelexp+1, level) == 0;
1085     }
1086
1087     if(levelexp[0] == '^') {
1088         strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
1089         mylevelexp[strlen(levelexp)-1] = '\0';
1090         if (strlen(levelexp) == 0) {
1091             error("Illegal level expression %s", levelexp);
1092             /*NOTREACHED*/
1093         }
1094     }
1095     else {
1096         strncpy(mylevelexp, levelexp, strlen(levelexp));
1097         mylevelexp[strlen(levelexp)] = '\0';
1098     }
1099
1100     if(mylevelexp[strlen(mylevelexp)-1] == '$') {
1101         match_exact = 1;
1102         mylevelexp[strlen(mylevelexp)-1] = '\0';
1103     }
1104     else
1105         match_exact = 0;
1106
1107     if((dash = strchr(mylevelexp,'-'))) {
1108         if(match_exact == 1) {
1109             goto illegal;
1110         }
1111
1112         *dash = '\0';
1113         if (!alldigits(mylevelexp) || !alldigits(dash+1)) goto illegal;
1114
1115         errno = 0;
1116         low = strtol(mylevelexp, (char **) NULL, 10);
1117         if (errno) goto illegal;
1118         hi = strtol(dash+1, (char **) NULL, 10);
1119         if (errno) goto illegal;
1120         level_i = strtol(level, (char **) NULL, 10);
1121         if (errno) goto illegal;
1122
1123         return ((level_i >= low) && (level_i <= hi));
1124     }
1125     else {
1126         if (!alldigits(mylevelexp)) goto illegal;
1127         if(match_exact == 1) {
1128             return (g_str_equal(level, mylevelexp));
1129         }
1130         else {
1131             return (g_str_has_prefix(level, mylevelexp));
1132         }
1133     }
1134 illegal:
1135     error("Illegal level expression %s", levelexp);
1136     /*NOTREACHED*/
1137 }