X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=common-src%2Fmatch.c;h=328097c4334a9e1604fa02aa3b183f45a1c114f5;hb=691567b16c13087b31ee4c2b6d038e57872fae82;hp=d8c83bcc7657ec16014ade4c0f35587bc4e20826;hpb=afaa71b3866b46b082b6c895772e15b36d8865fe;p=debian%2Famanda

diff --git a/common-src/match.c b/common-src/match.c
index d8c83bc..328097c 100644
--- a/common-src/match.c
+++ b/common-src/match.c
@@ -1,6 +1,7 @@
 /*
  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
  * Copyright (c) 1991-1998 University of Maryland at College Park
+ * Copyright (c) 2007-2012 Zmanda, Inc.  All Rights Reserved.
  * All Rights Reserved.
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
@@ -23,541 +24,949 @@
  * Authors: the Amanda Development Team.  Its members are listed in a
  * file named AUTHORS, in the root directory of this distribution.
  */
+
 /*
- * $Id: match.c,v 1.23 2006/05/25 01:47:12 johnfranks Exp $
- *
- * functions for checking and matching regular expressions
+ * See match.h for function prototypes and further explanations.
  */
 
 #include "amanda.h"
+#include "match.h"
 #include <regex.h>
 
-static int match_word(const char *glob, const char *word, const char separator);
+/*
+ * DATA STRUCTURES, MACROS, STATIC DATA
+ */
+
+/*
+ * Return codes used by try_match()
+ */
+
+#define MATCH_OK (1)
+#define MATCH_NONE (0)
+#define MATCH_ERROR (-1)
+
+/*
+ * Macro to tell whether a character is a regex metacharacter. Note that '*'
+ * and '?' are NOT included: they are themselves special in globs.
+ */
+
+#define IS_REGEX_META(c) ( \
+    (c) == '.' || (c) == '(' || (c) == ')' || (c) == '{' || (c) == '}' || \
+    (c) == '+' || (c) == '^' || (c) == '$' || (c) == '|' \
+)
+
+/*
+ * Define a specific type to hold error messages in case regex compile/matching
+ * fails
+ */
+
+typedef char regex_errbuf[STR_SIZE];
+
+/*
+ * Structure used by amglob_to_regex() to expand particular glob characters. Its
+ * fields are:
+ * - question_mark: what the question mark ('?') should be replaced with;
+ * - star: what the star ('*') should be replaced with;
+ * - double_star: what two consecutive stars should be replaced with.
+ *
+ * Note that apart from double_star, ALL OTHER FIELDS MUST NOT BE NULL.
+ */
+
+struct subst_table {
+    const char *question_mark;
+    const char *star;
+    const char *double_star;
+};
+
+/*
+ * Susbtitution data for glob_to_regex()
+ */
+
+static struct subst_table glob_subst_stable = {
+    "[^/]", /* question_mark */
+    "[^/]*", /* star */
+    NULL /* double_star */
+};
+
+/*
+ * Substitution data for tar_to_regex()
+ */
+
+static struct subst_table tar_subst_stable = {
+    "[^/]", /* question_mark */
+    ".*", /* star */
+    NULL /* double_star */
+};
+
+/*
+ * Substitution data for match_word(): dot
+ */
+
+static struct subst_table mword_dot_subst_table = {
+    "[^.]", /* question_mark */
+    "[^.]*", /* star */
+    ".*" /* double_star */
+};
+
+/*
+ * Substitution data for match_word(): slash
+ */
+
+static struct subst_table mword_slash_subst_table = {
+    "[^/]", /* question_mark */
+    "[^/]*", /* star */
+    ".*" /* double_star */
+};
+
+/*
+ * match_word() specific data:
+ * - re_double_sep: anchored regex matching two separators;
+ * - re_separator: regex matching the separator;
+ * - re_begin_full: regex matching the separator, anchored at the beginning;
+ * - re_end_full: regex matching the separator, andchored at the end.
+ */
+
+struct mword_regexes {
+    const char *re_double_sep;
+    const char *re_begin_full;
+    const char *re_separator;
+    const char *re_end_full;
+};
+
+static struct mword_regexes mword_dot_regexes = {
+    "^\\.\\.$", /* re_double_sep */
+    "^\\.", /* re_begin_full */
+    "\\.", /* re_separator */
+    "\\.$" /* re_end_full */
+};
+
+static struct mword_regexes mword_slash_regexes = {
+    "^\\/\\/$", /* re_double_sep */
+    "^\\/", /* re_begin_full */
+    "\\/", /* re_separator */
+    "\\/$" /* re_end_full */
+};
+
+/*
+ * Regular expression caches, and a static mutex to protect initialization and
+ * access. This may be unnecessarily coarse, but it is unknown at this time
+ * whether GHashTable accesses are thread-safe, and get_regex_from_cache() may
+ * be called from within threads, so play it safe.
+ */
+
+#if (GLIB_MAJOR_VERSION > 2 || (GLIB_MAJOR_VERSION == 2 && GLIB_MINOR_VERSION >= 31))
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+  static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
+# pragma GCC diagnostic pop
+#else
+  static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
+#endif
+static GHashTable *regex_cache = NULL, *regex_cache_newline = NULL;
+
+/*
+ * REGEX FUNCTIONS
+ */
 
-char *
-validate_regexp(
-    const char *	regex)
+/*
+ * Initialize regex caches. NOTE: this function MUST be called with
+ * re_cache_mutex LOCKED, see get_regex_from_cache()
+ */
+
+static void init_regex_caches(void)
 {
-    regex_t regc;
-    int result;
-    static char errmsg[STR_SIZE];
+    static gboolean initialized = FALSE;
 
-    if ((result = regcomp(&regc, regex,
-			  REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
-      regerror(result, &regc, errmsg, SIZEOF(errmsg));
-      return errmsg;
-    }
+    if (initialized)
+        return;
 
-    regfree(&regc);
+    regex_cache = g_hash_table_new(g_str_hash, g_str_equal);
+    regex_cache_newline = g_hash_table_new(g_str_hash, g_str_equal);
 
-    return NULL;
+    initialized = TRUE;
 }
 
-char *
-clean_regex(
-    const char *	regex)
+/*
+ * Cleanup a regular expression by escaping all non alphanumeric characters, and
+ * append beginning/end anchors if need be
+ */
+
+char *clean_regex(const char *str, gboolean anchor)
 {
-    char *result;
-    int j;
-    size_t i;
-    result = alloc(2*strlen(regex)+1);
-
-    for(i=0,j=0;i<strlen(regex);i++) {
-	if(!isalnum((int)regex[i]))
-	    result[j++]='\\';
-	result[j++]=regex[i];
+    const char *src;
+    char *result, *dst;
+
+    result = g_malloc(2 * strlen(str) + 3);
+    dst = result;
+
+    if (anchor)
+        *dst++ = '^';
+
+    for (src = str; *src; src++) {
+        if (!g_ascii_isalnum((int) *src))
+            *dst++ = '\\';
+        *dst++ = *src;
     }
-    result[j] = '\0';
+
+    if (anchor)
+        *dst++ = '$';
+
+    *dst = '\0';
     return result;
 }
 
-int
-match(
-    const char *	regex,
-    const char *	str)
+/*
+ * Compile one regular expression. Return TRUE if the regex has been compiled
+ * successfully. Otherwise, return FALSE and copy the error message into the
+ * supplied regex_errbuf pointer. Also, we want to know whether flags should
+ * include REG_NEWLINE (See regcomp(3) for details). Since this is the more
+ * frequent case, add REG_NEWLINE to the default flags, and remove it only if
+ * match_newline is set to FALSE.
+ */
+
+static gboolean do_regex_compile(const char *str, regex_t *regex,
+    regex_errbuf *errbuf, gboolean match_newline)
 {
-    regex_t regc;
+    int flags = REG_EXTENDED | REG_NOSUB | REG_NEWLINE;
     int result;
-    char errmsg[STR_SIZE];
 
-    if((result = regcomp(&regc, regex,
-			 REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
-        regerror(result, &regc, errmsg, SIZEOF(errmsg));
-	error(_("regex \"%s\": %s"), regex, errmsg);
-	/*NOTREACHED*/
-    }
+    if (!match_newline)
+	flags &= ~REG_NEWLINE;
 
-    if((result = regexec(&regc, str, 0, 0, 0)) != 0
-       && result != REG_NOMATCH) {
-        regerror(result, &regc, errmsg, SIZEOF(errmsg));
-	error(_("regex \"%s\": %s"), regex, errmsg);
-	/*NOTREACHED*/
-    }
+    result = regcomp(regex, str, flags);
 
-    regfree(&regc);
+    if (!result)
+        return TRUE;
 
-    return result == 0;
+    regerror(result, regex, *errbuf, sizeof(*errbuf));
+    return FALSE;
 }
 
-int
-match_no_newline(
-    const char *	regex,
-    const char *	str)
+/*
+ * Get an already compiled buffer from the regex cache. If the regex is not in
+ * the cache, allocate a new one and compile it using do_regex_compile(). If the
+ * compile fails, call regfree() on the object and return NULL to the caller. If
+ * it does succeed, put the regex buffer in cache and return a pointer to it.
+ */
+
+static regex_t *get_regex_from_cache(const char *re_str, regex_errbuf *errbuf,
+    gboolean match_newline)
 {
-    regex_t regc;
-    int result;
-    char errmsg[STR_SIZE];
+    regex_t *ret;
+    GHashTable *cache;
 
-    if((result = regcomp(&regc, regex,
-			 REG_EXTENDED|REG_NOSUB)) != 0) {
-        regerror(result, &regc, errmsg, SIZEOF(errmsg));
-	error(_("regex \"%s\": %s"), regex, errmsg);
-	/*NOTREACHED*/
-    }
+    g_static_mutex_lock(&re_cache_mutex);
 
-    if((result = regexec(&regc, str, 0, 0, 0)) != 0
-       && result != REG_NOMATCH) {
-        regerror(result, &regc, errmsg, SIZEOF(errmsg));
-	error(_("regex \"%s\": %s"), regex, errmsg);
-	/*NOTREACHED*/
+    init_regex_caches();
+
+    cache = (match_newline) ? regex_cache_newline: regex_cache;
+    ret = g_hash_table_lookup(cache, re_str);
+
+    if (ret)
+        goto out;
+
+    ret = g_new(regex_t, 1);
+
+    if (do_regex_compile(re_str, ret, errbuf, match_newline)) {
+        g_hash_table_insert(cache, g_strdup(re_str), ret);
+        goto out;
     }
 
-    regfree(&regc);
+    regfree(ret);
+    g_free(ret);
+    ret = NULL;
 
-    return result == 0;
+out:
+    g_static_mutex_unlock(&re_cache_mutex);
+    return ret;
 }
 
-char *
-validate_glob(
-    const char *	glob)
+/*
+ * Validate one regular expression using do_regex_compile(), and return NULL if
+ * the regex is valid, or the error message otherwise.
+ */
+
+char *validate_regexp(const char *regex)
 {
-    char *regex;
     regex_t regc;
-    int result;
-    static char errmsg[STR_SIZE];
+    static regex_errbuf errmsg;
+    gboolean valid;
 
-    regex = glob_to_regex(glob);
-    if ((result = regcomp(&regc, regex,
-			  REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
-      regerror(result, &regc, errmsg, SIZEOF(errmsg));
-      amfree(regex);
-      return errmsg;
-    }
+    valid = do_regex_compile(regex, &regc, &errmsg, TRUE);
 
     regfree(&regc);
-    amfree(regex);
+    return (valid) ? NULL : errmsg;
+}
 
-    return NULL;
+/*
+ * See if a string matches a compiled regular expression. Return one of MATCH_*
+ * defined above. If, for some reason, regexec() returns something other than
+ * not 0 or REG_NOMATCH, return MATCH_ERROR and print the error message in the
+ * supplied regex_errbuf.
+ */
+
+static int try_match(regex_t *regex, const char *str,
+    regex_errbuf *errbuf)
+{
+    int result = regexec(regex, str, 0, 0, 0);
+
+    switch(result) {
+        case 0:
+            return MATCH_OK;
+        case REG_NOMATCH:
+            return MATCH_NONE;
+        /* Fall through: something went really wrong */
+    }
+
+    regerror(result, regex, *errbuf, sizeof(*errbuf));
+    return MATCH_ERROR;
 }
 
-int
-match_glob(
-    const char *	glob,
-    const char *	str)
+/*
+ * Try and match a string against a regular expression, using
+ * do_regex_compile() and try_match(). Exit early if the regex didn't compile
+ * or there was an error during matching.
+ */
+
+int do_match(const char *regex, const char *str, gboolean match_newline)
 {
-    char *regex;
-    regex_t regc;
+    regex_t *re;
     int result;
-    char errmsg[STR_SIZE];
+    regex_errbuf errmsg;
 
-    regex = glob_to_regex(glob);
-    if((result = regcomp(&regc, regex,
-			 REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
-        regerror(result, &regc, errmsg, SIZEOF(errmsg));
-	error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
-	/*NOTREACHED*/
+    re = get_regex_from_cache(regex, &errmsg, match_newline);
+
+    if (!re)
+        error("regex \"%s\": %s", regex, errmsg);
+        /*NOTREACHED*/
+
+    result = try_match(re, str, &errmsg);
+
+    if (result == MATCH_ERROR)
+        error("regex \"%s\": %s", regex, errmsg);
+        /*NOTREACHED*/
+
+    return result;
+}
+
+/*
+ * DISK/HOST EXPRESSION HANDLING
+ */
+
+/*
+ * Check whether a given character should be escaped (that is, prepended with a
+ * backslash), EXCEPT for one character.
+ */
+
+static gboolean should_be_escaped_except(char c, char not_this_one)
+{
+    if (c == not_this_one)
+        return FALSE;
+
+    switch (c) {
+        case '\\':
+        case '^':
+        case '$':
+        case '?':
+        case '*':
+        case '[':
+        case ']':
+        case '.':
+        case '/':
+            return TRUE;
     }
 
-    if((result = regexec(&regc, str, 0, 0, 0)) != 0
-       && result != REG_NOMATCH) {
-        regerror(result, &regc, errmsg, SIZEOF(errmsg));
-	error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
-	/*NOTREACHED*/
+    return FALSE;
+}
+
+/*
+ * Take a disk/host expression and turn it into a full-blown amglob (with
+ * start and end anchors) following rules in amanda-match(7). The not_this_one
+ * argument represents a character which is NOT meant to be special in this
+ * case: '/' for disks and '.' for hosts.
+ */
+
+static char *full_amglob_from_expression(const char *str, char not_this_one)
+{
+    const char *src;
+    char *result, *dst;
+
+    result = g_malloc(2 * strlen(str) + 3);
+    dst = result;
+
+    *dst++ = '^';
+
+    for (src = str; *src; src++) {
+        if (should_be_escaped_except(*src, not_this_one))
+            *dst++ = '\\';
+        *dst++ = *src;
     }
 
-    regfree(&regc);
-    amfree(regex);
+    *dst++ = '$';
+    *dst = '\0';
+    return result;
+}
 
-    return result == 0;
+/*
+ * Turn a disk/host expression into a regex
+ */
+
+char *make_exact_disk_expression(const char *disk)
+{
+    return full_amglob_from_expression(disk, '/');
 }
 
-char *
-glob_to_regex(
-    const char *	glob)
+char *make_exact_host_expression(const char *host)
 {
-    char *regex;
-    char *r;
-    size_t len;
-    int ch;
-    int last_ch;
+    return full_amglob_from_expression(host, '.');
+}
+
+/*
+ * GLOB HANDLING, as per amanda-match(7)
+ */
+
+/*
+ * Turn a glob into a regex.
+ */
+
+static char *amglob_to_regex(const char *str, const char *begin,
+    const char *end, struct subst_table *table)
+{
+    const char *src;
+    char *result, *dst;
+    char c;
+    size_t worst_case;
+    gboolean double_star = (table->double_star != NULL);
 
     /*
-     * Allocate an area to convert into.  The worst case is a five to
-     * one expansion.
+     * There are two particular cases when building a regex out of a glob:
+     * character classes (anything inside [...] or [!...] and quotes (anything
+     * preceded by a backslash). We start with none being true.
      */
-    len = strlen(glob);
-    regex = alloc(1 + len * 5 + 1 + 1);
+
+    gboolean in_character_class = FALSE, in_quote = FALSE;
 
     /*
-     * Do the conversion:
-     *
-     *  ?      -> [^/]
-     *  *      -> [^/]*
-     *  [!...] -> [^...]
+     * Allocate enough space for our string. At worst, the allocated space is
+     * the length of the following:
+     * - beginning of regex;
+     * - size of original string multiplied by worst-case expansion;
+     * - end of regex;
+     * - final 0.
      *
-     * The following are given a leading backslash to protect them
-     * unless they already have a backslash:
-     *
-     *   ( ) { } + . ^ $ |
-     *
-     * Put a leading ^ and trailing $ around the result.  If the last
-     * non-escaped character is \ leave the $ off to cause a syntax
-     * error when the regex is compiled.
+     * Calculate the worst case expansion by walking our struct subst_table.
      */
 
-    r = regex;
-    *r++ = '^';
-    last_ch = '\0';
-    for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
-	if (last_ch == '\\') {
-	    *r++ = (char)ch;
-	    ch = '\0';			/* so last_ch != '\\' next time */
-	} else if (last_ch == '[' && ch == '!') {
-	    *r++ = '^';
-	} else if (ch == '\\') {
-	    *r++ = (char)ch;
-	} else if (ch == '*' || ch == '?') {
-	    *r++ = '[';
-	    *r++ = '^';
-	    *r++ = '/';
-	    *r++ = ']';
-	    if (ch == '*') {
-		*r++ = '*';
-	    }
-	} else if (ch == '('
-		   || ch == ')'
-		   || ch == '{'
-		   || ch == '}'
-		   || ch == '+'
-		   || ch == '.'
-		   || ch == '^'
-		   || ch == '$'
-		   || ch == '|') {
-	    *r++ = '\\';
-	    *r++ = (char)ch;
-	} else {
-	    *r++ = (char)ch;
-	}
-    }
-    if (last_ch != '\\') {
-	*r++ = '$';
+    worst_case = strlen(table->question_mark);
+
+    if (worst_case < strlen(table->star))
+        worst_case = strlen(table->star);
+
+    if (double_star && worst_case < strlen(table->double_star))
+        worst_case = strlen(table->double_star);
+
+    result = g_malloc(strlen(begin) + strlen(str) * worst_case + strlen(end) + 1);
+
+    /*
+     * Start by copying the beginning of the regex...
+     */
+
+    dst = g_stpcpy(result, begin);
+
+    /*
+     * ... Now to the meat of it.
+     */
+
+    for (src = str; *src; src++) {
+        c = *src;
+
+        /*
+         * First, check that we're in a character class: each and every
+         * character can be copied as is. We only need to be careful is the
+         * character is a closing bracket: it will end the character class IF
+         * AND ONLY IF it is not preceded by a backslash.
+         */
+
+        if (in_character_class) {
+            in_character_class = ((c != ']') || (*(src - 1) == '\\'));
+            goto straight_copy;
+        }
+
+        /*
+         * Are we in a quote? If yes, it is really simple: copy the current
+         * character, close the quote, the end.
+         */
+
+        if (in_quote) {
+            in_quote = FALSE;
+            goto straight_copy;
+        }
+
+        /*
+         * The only thing left to handle now is the "normal" case: we are not in
+         * a character class nor in a quote.
+         */
+
+        if (c == '\\') {
+            /*
+             * Backslash: append it, and open a new quote.
+             */
+            in_quote = TRUE;
+            goto straight_copy;
+        } else if (c == '[') {
+            /*
+             * Opening bracket: the beginning of a character class.
+             *
+             * Look ahead the next character: if it's an exclamation mark, then
+             * this is a complemented character class; append a caret to make
+             * the result string regex-friendly, and forward one character in
+             * advance.
+             */
+            *dst++ = c;
+            in_character_class = TRUE;
+            if (*(src + 1) == '!') {
+                *dst++ = '^';
+                src++;
+            }
+        } else if (IS_REGEX_META(c)) {
+            /*
+             * Regex metacharacter (except for ? and *, see below): append a
+             * backslash, and then the character itself.
+             */
+            *dst++ = '\\';
+            goto straight_copy;
+        } else if (c == '?')
+            /*
+             * Question mark: take the subsitution string out of our subst_table
+             * and append it to the string.
+             */
+            dst = g_stpcpy(dst, table->question_mark);
+        else if (c == '*') {
+            /*
+             * Star: append the subsitution string found in our subst_table.
+             * However, look forward the next character: if it's yet another
+             * star, then see if there is a substitution string for the double
+             * star and append this one instead.
+             *
+             * FIXME: this means that two consecutive stars in a glob string
+             * where there is no substition for double_star can lead to
+             * exponential regex execution time: consider [^/]*[^/]*.
+             */
+            const char *p = table->star;
+            if (double_star && *(src + 1) == '*') {
+                src++;
+                p = table->double_star;
+            }
+            dst = g_stpcpy(dst, p);
+        } else {
+            /*
+             * Any other character: append each time.
+             */
+straight_copy:
+            *dst++ = c;
+        }
     }
-    *r = '\0';
 
-    return regex;
+    /*
+     * Done, now append the end, ONLY if we are not in a quote - a lone
+     * backslash at the end of a glob is illegal, just leave it as it, it will
+     * make the regex compile fail.
+     */
+
+    if (!in_quote)
+        dst = g_stpcpy(dst, end);
+    /*
+     * Finalize, return.
+     */
+
+    *dst = '\0';
+    return result;
 }
 
+/*
+ * File globs
+ */
 
-int
-match_tar(
-    const char *	glob,
-    const char *	str)
+char *glob_to_regex(const char *glob)
+{
+    return amglob_to_regex(glob, "^", "$", &glob_subst_stable);
+}
+
+int match_glob(const char *glob, const char *str)
 {
     char *regex;
-    regex_t regc;
+    regex_t *re;
     int result;
-    char errmsg[STR_SIZE];
+    regex_errbuf errmsg;
 
-    regex = tar_to_regex(glob);
-    if((result = regcomp(&regc, regex,
-			 REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) {
-        regerror(result, &regc, errmsg, SIZEOF(errmsg));
-	error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
-	/*NOTREACHED*/
-    }
+    regex = glob_to_regex(glob);
+    re = get_regex_from_cache(regex, &errmsg, TRUE);
 
-    if((result = regexec(&regc, str, 0, 0, 0)) != 0
-       && result != REG_NOMATCH) {
-        regerror(result, &regc, errmsg, SIZEOF(errmsg));
-	error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
-	/*NOTREACHED*/
-    }
+    if (!re)
+        error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
+        /*NOTREACHED*/
+
+    result = try_match(re, str, &errmsg);
+
+    if (result == MATCH_ERROR)
+        error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
+        /*NOTREACHED*/
+
+    g_free(regex);
+
+    return result;
+}
+
+char *validate_glob(const char *glob)
+{
+    char *regex, *ret = NULL;
+    regex_t regc;
+    static regex_errbuf errmsg;
+
+    regex = glob_to_regex(glob);
+
+    if (!do_regex_compile(regex, &regc, &errmsg, TRUE))
+        ret = errmsg;
 
     regfree(&regc);
-    amfree(regex);
+    g_free(regex);
+    return ret;
+}
+
+/*
+ * Tar globs
+ */
 
-    return result == 0;
+static char *tar_to_regex(const char *glob)
+{
+    return amglob_to_regex(glob, "(^|/)", "($|/)", &tar_subst_stable);
 }
 
-char *
-tar_to_regex(
-    const char *	glob)
+int match_tar(const char *glob, const char *str)
 {
     char *regex;
-    char *r;
-    size_t len;
-    int ch;
-    int last_ch;
+    regex_t *re;
+    int result;
+    regex_errbuf errmsg;
+
+    regex = tar_to_regex(glob);
+    re = get_regex_from_cache(regex, &errmsg, TRUE);
+
+    if (!re)
+        error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
+        /*NOTREACHED*/
+
+    result = try_match(re, str, &errmsg);
+
+    if (result == MATCH_ERROR)
+        error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
+        /*NOTREACHED*/
+
+    g_free(regex);
+
+    return result;
+}
+
+/*
+ * DISK/HOST MATCHING
+ *
+ * The functions below wrap input strings with separators and attempt to match
+ * the result. The core of the operation is the match_word() function.
+ */
+
+/*
+ * Check whether a glob passed as an argument to match_word() only looks for the
+ * separator
+ */
+
+static gboolean glob_is_separator_only(const char *glob, char sep) {
+    size_t len = strlen(glob);
+    const char len2_1[] = { '^', sep , 0 }, len2_2[] = { sep, '$', 0 },
+        len3[] = { '^', sep, '$', 0 };
+
+    switch (len) {
+        case 1:
+            return (*glob == sep);
+        case 2:
+            return !(!g_str_equal(glob, len2_1) && !g_str_equal(glob, len2_2));
+        case 3:
+            return g_str_equal(glob, len3);
+        default:
+            return FALSE;
+    }
+}
+
+/*
+ * Given a word and a separator as an argument, wrap the word with separators -
+ * if need be. For instance, if '/' is the separator, the rules are:
+ *
+ * - "" -> "/"
+ * - "/" -> "//"
+ * - "//" -> left alone
+ * - "xxx" -> "/xxx/"
+ * - "/xxx" -> "/xxx/"
+ * - "xxx/" -> "/xxx/"
+ * - "/xxx/" -> left alone
+ *
+ * (note that xxx here may contain the separator as well)
+ *
+ * Note that the returned string is dynamically allocated: it is up to the
+ * caller to free it. Note also that the first argument MUST NOT BE NULL.
+ */
+
+static char *wrap_word(const char *word, const char separator, const char *glob)
+{
+    size_t len = strlen(word);
+    size_t len_glob = strlen(glob);
+    char *result, *p;
 
     /*
-     * Allocate an area to convert into.  The worst case is a five to
-     * one expansion.
+     * We allocate for the worst case, which is two bytes more than the input
+     * (have to prepend and append a separator).
      */
-    len = strlen(glob);
-    regex = alloc(1 + len * 5 + 1 + 1);
+    result = g_malloc(len + 3);
+    p = result;
 
     /*
-     * Do the conversion:
-     *
-     *  ?      -> [^/]
-     *  *      -> .*
-     *  [!...] -> [^...]
-     *
-     * The following are given a leading backslash to protect them
-     * unless they already have a backslash:
-     *
-     *   ( ) { } + . ^ $ |
-     *
-     * Put a leading ^ and trailing $ around the result.  If the last
-     * non-escaped character is \ leave the $ off to cause a syntax
-     * error when the regex is compiled.
+     * Zero-length: separator only
      */
 
-    r = regex;
-    *r++ = '^';
-    last_ch = '\0';
-    for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) {
-	if (last_ch == '\\') {
-	    *r++ = (char)ch;
-	    ch = '\0';			/* so last_ch != '\\' next time */
-	} else if (last_ch == '[' && ch == '!') {
-	    *r++ = '^';
-	} else if (ch == '\\') {
-	    *r++ = (char)ch;
-	} else if (ch == '*') {
-	    *r++ = '.';
-	    *r++ = '*';
-	} else if (ch == '?') {
-	    *r++ = '[';
-	    *r++ = '^';
-	    *r++ = '/';
-	    *r++ = ']';
-	} else if (ch == '('
-		   || ch == ')'
-		   || ch == '{'
-		   || ch == '}'
-		   || ch == '+'
-		   || ch == '.'
-		   || ch == '^'
-		   || ch == '$'
-		   || ch == '|') {
-	    *r++ = '\\';
-	    *r++ = (char)ch;
-	} else {
-	    *r++ = (char)ch;
-	}
+    if (len == 0) {
+        *p++ = separator;
+        goto out;
     }
-    if (last_ch != '\\') {
-	*r++ = '$';
+
+    /*
+     * Length is one: if the only character is the separator only, the result
+     * string is two separators
+     */
+
+    if (len == 1 && word[0] == separator) {
+        *p++ = separator;
+        *p++ = separator;
+        goto out;
     }
-    *r = '\0';
 
-    return regex;
-}
+    /*
+     * Otherwise: prepend the separator if needed, append the separator if
+     * needed.
+     */
 
+    if (word[0] != separator && glob[0] != '^')
+        *p++ = separator;
 
-static int
-match_word(
-    const char *	glob,
-    const char *	word,
-    const char		separator)
+    p = g_stpcpy(p, word);
+
+    if (word[len - 1] != separator && glob[len_glob-1] != '$')
+        *p++ = separator;
+
+out:
+    *p++ = '\0';
+    return result;
+}
+
+static int match_word(const char *glob, const char *word, const char separator)
 {
-    char *regex;
-    char *r;
-    size_t  len;
-    int  ch;
-    int  last_ch;
-    int  next_ch;
-    size_t  lenword;
-    char *nword;
-    char *nglob;
-    char *g; 
-    const char *w;
-    int  i;
-
-    lenword = strlen(word);
-    nword = (char *)alloc(lenword + 3);
-
-    r = nword;
-    w = word;
-    if(lenword == 1 && *w == separator) {
-	*r++ = separator;
-	*r++ = separator;
-    }
-    else {
-	if(*w != separator)
-	    *r++ = separator;
-	while(*w != '\0')
-	    *r++ = *w++;
-	if(*(r-1) != separator)
-	    *r++ = separator;    
-    }
-    *r = '\0';
+    char *wrapped_word = wrap_word(word, separator, glob);
+    struct mword_regexes *regexes = &mword_slash_regexes;
+    struct subst_table *table = &mword_slash_subst_table;
+    gboolean not_slash = (separator != '/');
+    int ret;
 
     /*
-     * Allocate an area to convert into.  The worst case is a six to
-     * one expansion.
+     * We only expect two separators: '/' or '.'. If it's not '/', it has to be
+     * the other one...
      */
-    len = strlen(glob);
-    regex = (char *)alloc(1 + len * 6 + 1 + 1 + 2 + 2);
-    r = regex;
-    nglob = stralloc(glob);
-    g = nglob;
-
-    if((len == 1 && nglob[0] == separator) ||
-       (len == 2 && nglob[0] == '^' && nglob[1] == separator) ||
-       (len == 2 && nglob[0] == separator && nglob[1] == '$') ||
-       (len == 3 && nglob[0] == '^' && nglob[1] == separator &&
-        nglob[2] == '$')) {
-	*r++ = '^';
-	*r++ = '\\';
-	*r++ = separator;
-	*r++ = '\\';
-	*r++ = separator;
-	*r++ = '$';
+    if (not_slash) {
+        regexes = &mword_dot_regexes;
+        table = &mword_dot_subst_table;
     }
-    else {
-	/*
-	 * Do the conversion:
-	 *
-	 *  ?      -> [^\separator]
-	 *  *      -> [^\separator]*
-	 *  [!...] -> [^...]
-	 *  **     -> .*
-	 *
-	 * The following are given a leading backslash to protect them
-	 * unless they already have a backslash:
-	 *
-	 *   ( ) { } + . ^ $ |
-	 *
-	 * If the last
-	 * non-escaped character is \ leave it to cause a syntax
-	 * error when the regex is compiled.
-	 */
-
-	if(*g == '^') {
-	    *r++ = '^';
-	    *r++ = '\\';	/* escape the separator */
-	    *r++ = separator;
-	    g++;
-	    if(*g == separator) g++;
-	}
-	else if(*g != separator) {
-	    *r++ = '\\';	/* add a leading \separator */
-	    *r++ = separator;
-	}
-	last_ch = '\0';
-	for (ch = *g++; ch != '\0'; last_ch = ch, ch = *g++) {
-	    next_ch = *g;
-	    if (last_ch == '\\') {
-		*r++ = (char)ch;
-		ch = '\0';		/* so last_ch != '\\' next time */
-	    } else if (last_ch == '[' && ch == '!') {
-		*r++ = '^';
-	    } else if (ch == '\\') {
-		*r++ = (char)ch;
-	    } else if (ch == '*' || ch == '?') {
-		if(ch == '*' && next_ch == '*') {
-		    *r++ = '.';
-		    g++;
-		}
-		else {
-		    *r++ = '[';
-		    *r++ = '^';
-		    *r++ = '\\';
-		    *r++ = separator;
-		    *r++ = ']';
-		}
-		if (ch == '*') {
-		    *r++ = '*';
-		}
-	    } else if (ch == '$' && next_ch == '\0') {
-		if(last_ch != separator) {
-		    *r++ = '\\';
-		    *r++ = separator;
+
+    if(glob_is_separator_only(glob, separator)) {
+        ret = do_match(regexes->re_double_sep, wrapped_word, TRUE);
+        goto out;
+    } else {
+        /*
+         * Unlike what happens for tar and disk expressions, we need to
+         * calculate the beginning and end of our regex before calling
+         * amglob_to_regex().
+         */
+
+        const char *begin, *end;
+        char *glob_copy = g_strdup(glob);
+        char *p, *g = glob_copy;
+        char *regex;
+
+        /*
+         * Calculate the beginning of the regex:
+         * - by default, it is an unanchored separator;
+         * - if the glob begins with a caret, make that an anchored separator,
+         *   and increment g appropriately;
+         * - if it begins with a separator, make it the empty string.
+         */
+
+        p = glob_copy;
+        begin = regexes->re_separator;
+
+        if (*p == '^') {
+            begin = "^";
+            p++, g++;
+            if (*p == separator) {
+		begin = regexes->re_begin_full;
+                g++;
+	    }
+        } else if (*p == separator)
+            begin = "";
+
+        /*
+         * Calculate the end of the regex:
+         * - an unanchored separator by default;
+         * - if the last character is a backslash or the separator itself, it
+         *   should be the empty string;
+         * - if it is a dollar sign, overwrite it with 0 and look at the
+         *   character before it: if it is the separator, only anchor at the
+         *   end, otherwise, add a separator before the anchor.
+         */
+
+        p = &(glob_copy[strlen(glob_copy) - 1]);
+        end = regexes->re_separator;
+        if (*p == '\\' || *p == separator) {
+            end = "";
+        } else if (*p == '$') {
+            char prev = *(p - 1);
+            *p = '\0';
+	    if (prev == separator) {
+		*(p-1) = '\0';
+		if (p-2 >= glob_copy) {
+		    prev = *(p - 2);
+		    if (prev == '\\') {
+			*(p-2) = '\0';
+		    }
 		}
-		*r++ = (char)ch;
-	    } else if (   ch == '('
-		       || ch == ')'
-		       || ch == '{'
-		       || ch == '}'
-		       || ch == '+'
-		       || ch == '.'
-		       || ch == '^'
-		       || ch == '$'
-		       || ch == '|') {
-		*r++ = '\\';
-		*r++ = (char)ch;
+		end = regexes->re_end_full;
 	    } else {
-		*r++ = (char)ch;
+		end = "$";
 	    }
-	}
-	if(last_ch != '\\') {
-	    if(last_ch != separator && last_ch != '$') {
-		*r++ = '\\';
-		*r++ = separator;		/* add a trailing \separator */
-	    }
-	}
-    }
-    *r = '\0';
+        }
+
+        regex = amglob_to_regex(g, begin, end, table);
+        ret = do_match(regex, wrapped_word, TRUE);
 
-    i = match(regex,nword);
+        g_free(glob_copy);
+        g_free(regex);
+    }
 
-    amfree(nword);
-    amfree(nglob);
-    amfree(regex);
-    return i;
+out:
+    g_free(wrapped_word);
+    return ret;
 }
 
+/*
+ * Match a host expression
+ */
 
-int
-match_host(
-    const char *	glob,
-    const char *	host)
+int match_host(const char *glob, const char *host)
 {
     char *lglob, *lhost;
-    char *c;
-    const char *d;
-    int i;
-
-    
-    lglob = (char *)alloc(strlen(glob)+1);
-    c = lglob, d=glob;
-    while( *d != '\0')
-	*c++ = (char)tolower(*d++);
-    *c = *d;
-
-    lhost = (char *)alloc(strlen(host)+1);
-    c = lhost, d=host;
-    while( *d != '\0')
-	*c++ = (char)tolower(*d++);
-    *c = *d;
-
-    i = match_word(lglob, lhost, (int)'.');
-    amfree(lglob);
-    amfree(lhost);
-    return i;
+    int ret;
+
+    if (*glob == '=') {
+	return strcmp(glob+1, host) == 0;
+    }
+    lglob = g_ascii_strdown(glob, -1);
+    lhost = g_ascii_strdown(host, -1);
+
+    ret = match_word(lglob, lhost, '.');
+
+    g_free(lglob);
+    g_free(lhost);
+    return ret;
 }
 
+/*
+ * Match a disk expression. Not as straightforward, since Windows paths must be
+ * accounted for.
+ */
 
-int
-match_disk(
-    const char *	glob,
-    const char *	disk)
+/*
+ * Convert a disk and glob from Windows expressed paths (backslashes) into Unix
+ * paths (slashes).
+ *
+ * Note: the resulting string is dynamically allocated, it is up to the caller
+ * to free it.
+ *
+ * Note 2: UNC in convert_unc_to_unix stands for Uniform Naming Convention.
+ */
+
+static char *convert_unc_to_unix(const char *unc)
+{
+    char *result = g_strdup(unc);
+    return g_strdelimit(result, "\\", '/');
+}
+
+static char *convert_winglob_to_unix(const char *glob)
 {
-    return match_word(glob, disk, '/');
+    const char *src;
+    char *result, *dst;
+    result = g_malloc(strlen(glob) + 1);
+    dst = result;
+
+    for (src = glob; *src; src++) {
+        if (*src == '\\' && *(src + 1) == '\\') {
+            *dst++ = '/';
+            src++;
+            continue;
+        }
+        *dst++ = *src;
+    }
+    *dst = '\0';
+    return result;
 }
 
+/*
+ * Match a disk expression
+ */
+
+int match_disk(const char *glob, const char *disk)
+{
+    char *glob2 = NULL, *disk2 = NULL;
+    const char *g = glob, *d = disk;
+    int result;
+
+    /*
+     * Check whether our disk potentially refers to a Windows share (the first
+     * two characters are '\' and there is no / in the word at all): if yes,
+     * build Unix paths instead and pass those as arguments to match_word()
+     */
+
+    gboolean windows_share = !(strncmp(disk, "\\\\", 2) || strchr(disk, '/'));
+
+    if (*glob == '=') {
+	return strcmp(glob+1, disk) == 0;
+    }
+
+    if (windows_share) {
+        glob2 = convert_winglob_to_unix(glob);
+        disk2 = convert_unc_to_unix(disk);
+        g = (const char *) glob2;
+        d = (const char *) disk2;
+    }
+
+    result = match_word(g, d, '/');
+
+    /*
+     * We can g_free(NULL), so this is "safe"
+     */
+    g_free(glob2);
+    g_free(disk2);
+
+    return result;
+}
+
+/*
+ * TIMESTAMPS/LEVEL MATCHING
+ */
+
 static int
 alldigits(
     const char *str)
@@ -584,7 +993,11 @@ match_datestamp(
     if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
 	goto illegal;
     }
-   
+
+    if (*dateexp == '=') {
+	return strcmp(dateexp+1, datestamp) == 0;
+    }
+
     /* strip and ignore an initial "^" */
     if(dateexp[0] == '^') {
 	strncpy(mydateexp, dateexp+1, sizeof(mydateexp)-1);
@@ -595,6 +1008,10 @@ match_datestamp(
 	mydateexp[sizeof(mydateexp)-1] = '\0';
     }
 
+    if(strlen(dateexp) < 1) {
+	goto illegal;
+    }
+
     if(mydateexp[strlen(mydateexp)-1] == '$') {
 	match_exact = 1;
 	mydateexp[strlen(mydateexp)-1] = '\0';	/* strip the trailing $ */
@@ -613,6 +1030,9 @@ match_datestamp(
 	len = (size_t)(dash - mydateexp);   /* length of XXXYYYY */
 	len_suffix = strlen(dash) - 1;	/* length of ZZZZ */
 	if (len_suffix > len) goto illegal;
+	if (len < len_suffix) {
+	    goto illegal;
+	}
 	len_prefix = len - len_suffix; /* length of XXX */
 
 	dash++;
@@ -633,14 +1053,14 @@ match_datestamp(
 	if (!alldigits(mydateexp))
 	    goto illegal;
 	if(match_exact == 1) {
-	    return (strcmp(datestamp, mydateexp) == 0);
+	    return (g_str_equal(datestamp, mydateexp));
 	}
 	else {
-	    return (strncmp(datestamp, mydateexp, strlen(mydateexp)) == 0);
+	    return (g_str_has_prefix(datestamp, mydateexp));
 	}
     }
 illegal:
-	error(_("Illegal datestamp expression %s"),dateexp);
+	error("Illegal datestamp expression %s", dateexp);
 	/*NOTREACHED*/
 }
 
@@ -651,57 +1071,67 @@ match_level(
     const char *	level)
 {
     char *dash;
-    size_t len, len_suffix;
-    size_t len_prefix;
-    char lowend[100], highend[100];
+    long int low, hi, level_i;
     char mylevelexp[100];
     int match_exact;
 
     if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
-	error(_("Illegal level expression %s"),levelexp);
+	error("Illegal level expression %s", levelexp);
 	/*NOTREACHED*/
     }
-   
+
+    if (*levelexp == '=') {
+	return strcmp(levelexp+1, level) == 0;
+    }
+
     if(levelexp[0] == '^') {
-	strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1); 
+	strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
 	mylevelexp[strlen(levelexp)-1] = '\0';
+	if (strlen(levelexp) == 0) {
+	    error("Illegal level expression %s", levelexp);
+	    /*NOTREACHED*/
+	}
     }
     else {
 	strncpy(mylevelexp, levelexp, strlen(levelexp));
 	mylevelexp[strlen(levelexp)] = '\0';
     }
 
-    if(mylevelexp[strlen(mylevelexp)] == '$') {
+    if(mylevelexp[strlen(mylevelexp)-1] == '$') {
 	match_exact = 1;
-	mylevelexp[strlen(mylevelexp)] = '\0';
+	mylevelexp[strlen(mylevelexp)-1] = '\0';
     }
     else
 	match_exact = 0;
 
     if((dash = strchr(mylevelexp,'-'))) {
 	if(match_exact == 1) {
-	    error(_("Illegal level expression %s"),levelexp);
-	    /*NOTREACHED*/
+            goto illegal;
 	}
-	len = (size_t)(dash - mylevelexp);
-	len_suffix = strlen(dash) - 1;
-	len_prefix = len - len_suffix;
 
-	dash++;
-	strncpy(lowend, mylevelexp, len);
-	lowend[len] = '\0';
-	strncpy(highend, mylevelexp, len_prefix);
-	strncpy(&(highend[len_prefix]), dash, len_suffix);
-	highend[len] = '\0';
-	return ((strncmp(level, lowend, strlen(lowend)) >= 0) &&
-		(strncmp(level, highend , strlen(highend))  <= 0));
+        *dash = '\0';
+        if (!alldigits(mylevelexp) || !alldigits(dash+1)) goto illegal;
+
+        errno = 0;
+        low = strtol(mylevelexp, (char **) NULL, 10);
+        if (errno) goto illegal;
+        hi = strtol(dash+1, (char **) NULL, 10);
+        if (errno) goto illegal;
+        level_i = strtol(level, (char **) NULL, 10);
+        if (errno) goto illegal;
+
+	return ((level_i >= low) && (level_i <= hi));
     }
     else {
+	if (!alldigits(mylevelexp)) goto illegal;
 	if(match_exact == 1) {
-	    return (strcmp(level, mylevelexp) == 0);
+	    return (g_str_equal(level, mylevelexp));
 	}
 	else {
-	    return (strncmp(level, mylevelexp, strlen(mylevelexp)) == 0);
+	    return (g_str_has_prefix(level, mylevelexp));
 	}
     }
+illegal:
+    error("Illegal level expression %s", levelexp);
+    /*NOTREACHED*/
 }