X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=common-src%2Fmatch.c;h=328097c4334a9e1604fa02aa3b183f45a1c114f5;hb=691567b16c13087b31ee4c2b6d038e57872fae82;hp=d8c83bcc7657ec16014ade4c0f35587bc4e20826;hpb=afaa71b3866b46b082b6c895772e15b36d8865fe;p=debian%2Famanda diff --git a/common-src/match.c b/common-src/match.c index d8c83bc..328097c 100644 --- a/common-src/match.c +++ b/common-src/match.c @@ -1,6 +1,7 @@ /* * Amanda, The Advanced Maryland Automatic Network Disk Archiver * Copyright (c) 1991-1998 University of Maryland at College Park + * Copyright (c) 2007-2012 Zmanda, Inc. All Rights Reserved. * All Rights Reserved. * * Permission to use, copy, modify, distribute, and sell this software and its @@ -23,541 +24,949 @@ * Authors: the Amanda Development Team. Its members are listed in a * file named AUTHORS, in the root directory of this distribution. */ + /* - * $Id: match.c,v 1.23 2006/05/25 01:47:12 johnfranks Exp $ - * - * functions for checking and matching regular expressions + * See match.h for function prototypes and further explanations. */ #include "amanda.h" +#include "match.h" #include -static int match_word(const char *glob, const char *word, const char separator); +/* + * DATA STRUCTURES, MACROS, STATIC DATA + */ + +/* + * Return codes used by try_match() + */ + +#define MATCH_OK (1) +#define MATCH_NONE (0) +#define MATCH_ERROR (-1) + +/* + * Macro to tell whether a character is a regex metacharacter. Note that '*' + * and '?' are NOT included: they are themselves special in globs. + */ + +#define IS_REGEX_META(c) ( \ + (c) == '.' || (c) == '(' || (c) == ')' || (c) == '{' || (c) == '}' || \ + (c) == '+' || (c) == '^' || (c) == '$' || (c) == '|' \ +) + +/* + * Define a specific type to hold error messages in case regex compile/matching + * fails + */ + +typedef char regex_errbuf[STR_SIZE]; + +/* + * Structure used by amglob_to_regex() to expand particular glob characters. Its + * fields are: + * - question_mark: what the question mark ('?') should be replaced with; + * - star: what the star ('*') should be replaced with; + * - double_star: what two consecutive stars should be replaced with. + * + * Note that apart from double_star, ALL OTHER FIELDS MUST NOT BE NULL. + */ + +struct subst_table { + const char *question_mark; + const char *star; + const char *double_star; +}; + +/* + * Susbtitution data for glob_to_regex() + */ + +static struct subst_table glob_subst_stable = { + "[^/]", /* question_mark */ + "[^/]*", /* star */ + NULL /* double_star */ +}; + +/* + * Substitution data for tar_to_regex() + */ + +static struct subst_table tar_subst_stable = { + "[^/]", /* question_mark */ + ".*", /* star */ + NULL /* double_star */ +}; + +/* + * Substitution data for match_word(): dot + */ + +static struct subst_table mword_dot_subst_table = { + "[^.]", /* question_mark */ + "[^.]*", /* star */ + ".*" /* double_star */ +}; + +/* + * Substitution data for match_word(): slash + */ + +static struct subst_table mword_slash_subst_table = { + "[^/]", /* question_mark */ + "[^/]*", /* star */ + ".*" /* double_star */ +}; + +/* + * match_word() specific data: + * - re_double_sep: anchored regex matching two separators; + * - re_separator: regex matching the separator; + * - re_begin_full: regex matching the separator, anchored at the beginning; + * - re_end_full: regex matching the separator, andchored at the end. + */ + +struct mword_regexes { + const char *re_double_sep; + const char *re_begin_full; + const char *re_separator; + const char *re_end_full; +}; + +static struct mword_regexes mword_dot_regexes = { + "^\\.\\.$", /* re_double_sep */ + "^\\.", /* re_begin_full */ + "\\.", /* re_separator */ + "\\.$" /* re_end_full */ +}; + +static struct mword_regexes mword_slash_regexes = { + "^\\/\\/$", /* re_double_sep */ + "^\\/", /* re_begin_full */ + "\\/", /* re_separator */ + "\\/$" /* re_end_full */ +}; + +/* + * Regular expression caches, and a static mutex to protect initialization and + * access. This may be unnecessarily coarse, but it is unknown at this time + * whether GHashTable accesses are thread-safe, and get_regex_from_cache() may + * be called from within threads, so play it safe. + */ + +#if (GLIB_MAJOR_VERSION > 2 || (GLIB_MAJOR_VERSION == 2 && GLIB_MINOR_VERSION >= 31)) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wmissing-field-initializers" + static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT; +# pragma GCC diagnostic pop +#else + static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT; +#endif +static GHashTable *regex_cache = NULL, *regex_cache_newline = NULL; + +/* + * REGEX FUNCTIONS + */ -char * -validate_regexp( - const char * regex) +/* + * Initialize regex caches. NOTE: this function MUST be called with + * re_cache_mutex LOCKED, see get_regex_from_cache() + */ + +static void init_regex_caches(void) { - regex_t regc; - int result; - static char errmsg[STR_SIZE]; + static gboolean initialized = FALSE; - if ((result = regcomp(®c, regex, - REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) { - regerror(result, ®c, errmsg, SIZEOF(errmsg)); - return errmsg; - } + if (initialized) + return; - regfree(®c); + regex_cache = g_hash_table_new(g_str_hash, g_str_equal); + regex_cache_newline = g_hash_table_new(g_str_hash, g_str_equal); - return NULL; + initialized = TRUE; } -char * -clean_regex( - const char * regex) +/* + * Cleanup a regular expression by escaping all non alphanumeric characters, and + * append beginning/end anchors if need be + */ + +char *clean_regex(const char *str, gboolean anchor) { - char *result; - int j; - size_t i; - result = alloc(2*strlen(regex)+1); - - for(i=0,j=0;i regex \"%s\": %s"), glob, regex, errmsg); - /*NOTREACHED*/ + re = get_regex_from_cache(regex, &errmsg, match_newline); + + if (!re) + error("regex \"%s\": %s", regex, errmsg); + /*NOTREACHED*/ + + result = try_match(re, str, &errmsg); + + if (result == MATCH_ERROR) + error("regex \"%s\": %s", regex, errmsg); + /*NOTREACHED*/ + + return result; +} + +/* + * DISK/HOST EXPRESSION HANDLING + */ + +/* + * Check whether a given character should be escaped (that is, prepended with a + * backslash), EXCEPT for one character. + */ + +static gboolean should_be_escaped_except(char c, char not_this_one) +{ + if (c == not_this_one) + return FALSE; + + switch (c) { + case '\\': + case '^': + case '$': + case '?': + case '*': + case '[': + case ']': + case '.': + case '/': + return TRUE; } - if((result = regexec(®c, str, 0, 0, 0)) != 0 - && result != REG_NOMATCH) { - regerror(result, ®c, errmsg, SIZEOF(errmsg)); - error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg); - /*NOTREACHED*/ + return FALSE; +} + +/* + * Take a disk/host expression and turn it into a full-blown amglob (with + * start and end anchors) following rules in amanda-match(7). The not_this_one + * argument represents a character which is NOT meant to be special in this + * case: '/' for disks and '.' for hosts. + */ + +static char *full_amglob_from_expression(const char *str, char not_this_one) +{ + const char *src; + char *result, *dst; + + result = g_malloc(2 * strlen(str) + 3); + dst = result; + + *dst++ = '^'; + + for (src = str; *src; src++) { + if (should_be_escaped_except(*src, not_this_one)) + *dst++ = '\\'; + *dst++ = *src; } - regfree(®c); - amfree(regex); + *dst++ = '$'; + *dst = '\0'; + return result; +} - return result == 0; +/* + * Turn a disk/host expression into a regex + */ + +char *make_exact_disk_expression(const char *disk) +{ + return full_amglob_from_expression(disk, '/'); } -char * -glob_to_regex( - const char * glob) +char *make_exact_host_expression(const char *host) { - char *regex; - char *r; - size_t len; - int ch; - int last_ch; + return full_amglob_from_expression(host, '.'); +} + +/* + * GLOB HANDLING, as per amanda-match(7) + */ + +/* + * Turn a glob into a regex. + */ + +static char *amglob_to_regex(const char *str, const char *begin, + const char *end, struct subst_table *table) +{ + const char *src; + char *result, *dst; + char c; + size_t worst_case; + gboolean double_star = (table->double_star != NULL); /* - * Allocate an area to convert into. The worst case is a five to - * one expansion. + * There are two particular cases when building a regex out of a glob: + * character classes (anything inside [...] or [!...] and quotes (anything + * preceded by a backslash). We start with none being true. */ - len = strlen(glob); - regex = alloc(1 + len * 5 + 1 + 1); + + gboolean in_character_class = FALSE, in_quote = FALSE; /* - * Do the conversion: - * - * ? -> [^/] - * * -> [^/]* - * [!...] -> [^...] + * Allocate enough space for our string. At worst, the allocated space is + * the length of the following: + * - beginning of regex; + * - size of original string multiplied by worst-case expansion; + * - end of regex; + * - final 0. * - * The following are given a leading backslash to protect them - * unless they already have a backslash: - * - * ( ) { } + . ^ $ | - * - * Put a leading ^ and trailing $ around the result. If the last - * non-escaped character is \ leave the $ off to cause a syntax - * error when the regex is compiled. + * Calculate the worst case expansion by walking our struct subst_table. */ - r = regex; - *r++ = '^'; - last_ch = '\0'; - for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) { - if (last_ch == '\\') { - *r++ = (char)ch; - ch = '\0'; /* so last_ch != '\\' next time */ - } else if (last_ch == '[' && ch == '!') { - *r++ = '^'; - } else if (ch == '\\') { - *r++ = (char)ch; - } else if (ch == '*' || ch == '?') { - *r++ = '['; - *r++ = '^'; - *r++ = '/'; - *r++ = ']'; - if (ch == '*') { - *r++ = '*'; - } - } else if (ch == '(' - || ch == ')' - || ch == '{' - || ch == '}' - || ch == '+' - || ch == '.' - || ch == '^' - || ch == '$' - || ch == '|') { - *r++ = '\\'; - *r++ = (char)ch; - } else { - *r++ = (char)ch; - } - } - if (last_ch != '\\') { - *r++ = '$'; + worst_case = strlen(table->question_mark); + + if (worst_case < strlen(table->star)) + worst_case = strlen(table->star); + + if (double_star && worst_case < strlen(table->double_star)) + worst_case = strlen(table->double_star); + + result = g_malloc(strlen(begin) + strlen(str) * worst_case + strlen(end) + 1); + + /* + * Start by copying the beginning of the regex... + */ + + dst = g_stpcpy(result, begin); + + /* + * ... Now to the meat of it. + */ + + for (src = str; *src; src++) { + c = *src; + + /* + * First, check that we're in a character class: each and every + * character can be copied as is. We only need to be careful is the + * character is a closing bracket: it will end the character class IF + * AND ONLY IF it is not preceded by a backslash. + */ + + if (in_character_class) { + in_character_class = ((c != ']') || (*(src - 1) == '\\')); + goto straight_copy; + } + + /* + * Are we in a quote? If yes, it is really simple: copy the current + * character, close the quote, the end. + */ + + if (in_quote) { + in_quote = FALSE; + goto straight_copy; + } + + /* + * The only thing left to handle now is the "normal" case: we are not in + * a character class nor in a quote. + */ + + if (c == '\\') { + /* + * Backslash: append it, and open a new quote. + */ + in_quote = TRUE; + goto straight_copy; + } else if (c == '[') { + /* + * Opening bracket: the beginning of a character class. + * + * Look ahead the next character: if it's an exclamation mark, then + * this is a complemented character class; append a caret to make + * the result string regex-friendly, and forward one character in + * advance. + */ + *dst++ = c; + in_character_class = TRUE; + if (*(src + 1) == '!') { + *dst++ = '^'; + src++; + } + } else if (IS_REGEX_META(c)) { + /* + * Regex metacharacter (except for ? and *, see below): append a + * backslash, and then the character itself. + */ + *dst++ = '\\'; + goto straight_copy; + } else if (c == '?') + /* + * Question mark: take the subsitution string out of our subst_table + * and append it to the string. + */ + dst = g_stpcpy(dst, table->question_mark); + else if (c == '*') { + /* + * Star: append the subsitution string found in our subst_table. + * However, look forward the next character: if it's yet another + * star, then see if there is a substitution string for the double + * star and append this one instead. + * + * FIXME: this means that two consecutive stars in a glob string + * where there is no substition for double_star can lead to + * exponential regex execution time: consider [^/]*[^/]*. + */ + const char *p = table->star; + if (double_star && *(src + 1) == '*') { + src++; + p = table->double_star; + } + dst = g_stpcpy(dst, p); + } else { + /* + * Any other character: append each time. + */ +straight_copy: + *dst++ = c; + } } - *r = '\0'; - return regex; + /* + * Done, now append the end, ONLY if we are not in a quote - a lone + * backslash at the end of a glob is illegal, just leave it as it, it will + * make the regex compile fail. + */ + + if (!in_quote) + dst = g_stpcpy(dst, end); + /* + * Finalize, return. + */ + + *dst = '\0'; + return result; } +/* + * File globs + */ -int -match_tar( - const char * glob, - const char * str) +char *glob_to_regex(const char *glob) +{ + return amglob_to_regex(glob, "^", "$", &glob_subst_stable); +} + +int match_glob(const char *glob, const char *str) { char *regex; - regex_t regc; + regex_t *re; int result; - char errmsg[STR_SIZE]; + regex_errbuf errmsg; - regex = tar_to_regex(glob); - if((result = regcomp(®c, regex, - REG_EXTENDED|REG_NOSUB|REG_NEWLINE)) != 0) { - regerror(result, ®c, errmsg, SIZEOF(errmsg)); - error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg); - /*NOTREACHED*/ - } + regex = glob_to_regex(glob); + re = get_regex_from_cache(regex, &errmsg, TRUE); - if((result = regexec(®c, str, 0, 0, 0)) != 0 - && result != REG_NOMATCH) { - regerror(result, ®c, errmsg, SIZEOF(errmsg)); - error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg); - /*NOTREACHED*/ - } + if (!re) + error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg); + /*NOTREACHED*/ + + result = try_match(re, str, &errmsg); + + if (result == MATCH_ERROR) + error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg); + /*NOTREACHED*/ + + g_free(regex); + + return result; +} + +char *validate_glob(const char *glob) +{ + char *regex, *ret = NULL; + regex_t regc; + static regex_errbuf errmsg; + + regex = glob_to_regex(glob); + + if (!do_regex_compile(regex, ®c, &errmsg, TRUE)) + ret = errmsg; regfree(®c); - amfree(regex); + g_free(regex); + return ret; +} + +/* + * Tar globs + */ - return result == 0; +static char *tar_to_regex(const char *glob) +{ + return amglob_to_regex(glob, "(^|/)", "($|/)", &tar_subst_stable); } -char * -tar_to_regex( - const char * glob) +int match_tar(const char *glob, const char *str) { char *regex; - char *r; - size_t len; - int ch; - int last_ch; + regex_t *re; + int result; + regex_errbuf errmsg; + + regex = tar_to_regex(glob); + re = get_regex_from_cache(regex, &errmsg, TRUE); + + if (!re) + error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg); + /*NOTREACHED*/ + + result = try_match(re, str, &errmsg); + + if (result == MATCH_ERROR) + error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg); + /*NOTREACHED*/ + + g_free(regex); + + return result; +} + +/* + * DISK/HOST MATCHING + * + * The functions below wrap input strings with separators and attempt to match + * the result. The core of the operation is the match_word() function. + */ + +/* + * Check whether a glob passed as an argument to match_word() only looks for the + * separator + */ + +static gboolean glob_is_separator_only(const char *glob, char sep) { + size_t len = strlen(glob); + const char len2_1[] = { '^', sep , 0 }, len2_2[] = { sep, '$', 0 }, + len3[] = { '^', sep, '$', 0 }; + + switch (len) { + case 1: + return (*glob == sep); + case 2: + return !(!g_str_equal(glob, len2_1) && !g_str_equal(glob, len2_2)); + case 3: + return g_str_equal(glob, len3); + default: + return FALSE; + } +} + +/* + * Given a word and a separator as an argument, wrap the word with separators - + * if need be. For instance, if '/' is the separator, the rules are: + * + * - "" -> "/" + * - "/" -> "//" + * - "//" -> left alone + * - "xxx" -> "/xxx/" + * - "/xxx" -> "/xxx/" + * - "xxx/" -> "/xxx/" + * - "/xxx/" -> left alone + * + * (note that xxx here may contain the separator as well) + * + * Note that the returned string is dynamically allocated: it is up to the + * caller to free it. Note also that the first argument MUST NOT BE NULL. + */ + +static char *wrap_word(const char *word, const char separator, const char *glob) +{ + size_t len = strlen(word); + size_t len_glob = strlen(glob); + char *result, *p; /* - * Allocate an area to convert into. The worst case is a five to - * one expansion. + * We allocate for the worst case, which is two bytes more than the input + * (have to prepend and append a separator). */ - len = strlen(glob); - regex = alloc(1 + len * 5 + 1 + 1); + result = g_malloc(len + 3); + p = result; /* - * Do the conversion: - * - * ? -> [^/] - * * -> .* - * [!...] -> [^...] - * - * The following are given a leading backslash to protect them - * unless they already have a backslash: - * - * ( ) { } + . ^ $ | - * - * Put a leading ^ and trailing $ around the result. If the last - * non-escaped character is \ leave the $ off to cause a syntax - * error when the regex is compiled. + * Zero-length: separator only */ - r = regex; - *r++ = '^'; - last_ch = '\0'; - for (ch = *glob++; ch != '\0'; last_ch = ch, ch = *glob++) { - if (last_ch == '\\') { - *r++ = (char)ch; - ch = '\0'; /* so last_ch != '\\' next time */ - } else if (last_ch == '[' && ch == '!') { - *r++ = '^'; - } else if (ch == '\\') { - *r++ = (char)ch; - } else if (ch == '*') { - *r++ = '.'; - *r++ = '*'; - } else if (ch == '?') { - *r++ = '['; - *r++ = '^'; - *r++ = '/'; - *r++ = ']'; - } else if (ch == '(' - || ch == ')' - || ch == '{' - || ch == '}' - || ch == '+' - || ch == '.' - || ch == '^' - || ch == '$' - || ch == '|') { - *r++ = '\\'; - *r++ = (char)ch; - } else { - *r++ = (char)ch; - } + if (len == 0) { + *p++ = separator; + goto out; } - if (last_ch != '\\') { - *r++ = '$'; + + /* + * Length is one: if the only character is the separator only, the result + * string is two separators + */ + + if (len == 1 && word[0] == separator) { + *p++ = separator; + *p++ = separator; + goto out; } - *r = '\0'; - return regex; -} + /* + * Otherwise: prepend the separator if needed, append the separator if + * needed. + */ + if (word[0] != separator && glob[0] != '^') + *p++ = separator; -static int -match_word( - const char * glob, - const char * word, - const char separator) + p = g_stpcpy(p, word); + + if (word[len - 1] != separator && glob[len_glob-1] != '$') + *p++ = separator; + +out: + *p++ = '\0'; + return result; +} + +static int match_word(const char *glob, const char *word, const char separator) { - char *regex; - char *r; - size_t len; - int ch; - int last_ch; - int next_ch; - size_t lenword; - char *nword; - char *nglob; - char *g; - const char *w; - int i; - - lenword = strlen(word); - nword = (char *)alloc(lenword + 3); - - r = nword; - w = word; - if(lenword == 1 && *w == separator) { - *r++ = separator; - *r++ = separator; - } - else { - if(*w != separator) - *r++ = separator; - while(*w != '\0') - *r++ = *w++; - if(*(r-1) != separator) - *r++ = separator; - } - *r = '\0'; + char *wrapped_word = wrap_word(word, separator, glob); + struct mword_regexes *regexes = &mword_slash_regexes; + struct subst_table *table = &mword_slash_subst_table; + gboolean not_slash = (separator != '/'); + int ret; /* - * Allocate an area to convert into. The worst case is a six to - * one expansion. + * We only expect two separators: '/' or '.'. If it's not '/', it has to be + * the other one... */ - len = strlen(glob); - regex = (char *)alloc(1 + len * 6 + 1 + 1 + 2 + 2); - r = regex; - nglob = stralloc(glob); - g = nglob; - - if((len == 1 && nglob[0] == separator) || - (len == 2 && nglob[0] == '^' && nglob[1] == separator) || - (len == 2 && nglob[0] == separator && nglob[1] == '$') || - (len == 3 && nglob[0] == '^' && nglob[1] == separator && - nglob[2] == '$')) { - *r++ = '^'; - *r++ = '\\'; - *r++ = separator; - *r++ = '\\'; - *r++ = separator; - *r++ = '$'; + if (not_slash) { + regexes = &mword_dot_regexes; + table = &mword_dot_subst_table; } - else { - /* - * Do the conversion: - * - * ? -> [^\separator] - * * -> [^\separator]* - * [!...] -> [^...] - * ** -> .* - * - * The following are given a leading backslash to protect them - * unless they already have a backslash: - * - * ( ) { } + . ^ $ | - * - * If the last - * non-escaped character is \ leave it to cause a syntax - * error when the regex is compiled. - */ - - if(*g == '^') { - *r++ = '^'; - *r++ = '\\'; /* escape the separator */ - *r++ = separator; - g++; - if(*g == separator) g++; - } - else if(*g != separator) { - *r++ = '\\'; /* add a leading \separator */ - *r++ = separator; - } - last_ch = '\0'; - for (ch = *g++; ch != '\0'; last_ch = ch, ch = *g++) { - next_ch = *g; - if (last_ch == '\\') { - *r++ = (char)ch; - ch = '\0'; /* so last_ch != '\\' next time */ - } else if (last_ch == '[' && ch == '!') { - *r++ = '^'; - } else if (ch == '\\') { - *r++ = (char)ch; - } else if (ch == '*' || ch == '?') { - if(ch == '*' && next_ch == '*') { - *r++ = '.'; - g++; - } - else { - *r++ = '['; - *r++ = '^'; - *r++ = '\\'; - *r++ = separator; - *r++ = ']'; - } - if (ch == '*') { - *r++ = '*'; - } - } else if (ch == '$' && next_ch == '\0') { - if(last_ch != separator) { - *r++ = '\\'; - *r++ = separator; + + if(glob_is_separator_only(glob, separator)) { + ret = do_match(regexes->re_double_sep, wrapped_word, TRUE); + goto out; + } else { + /* + * Unlike what happens for tar and disk expressions, we need to + * calculate the beginning and end of our regex before calling + * amglob_to_regex(). + */ + + const char *begin, *end; + char *glob_copy = g_strdup(glob); + char *p, *g = glob_copy; + char *regex; + + /* + * Calculate the beginning of the regex: + * - by default, it is an unanchored separator; + * - if the glob begins with a caret, make that an anchored separator, + * and increment g appropriately; + * - if it begins with a separator, make it the empty string. + */ + + p = glob_copy; + begin = regexes->re_separator; + + if (*p == '^') { + begin = "^"; + p++, g++; + if (*p == separator) { + begin = regexes->re_begin_full; + g++; + } + } else if (*p == separator) + begin = ""; + + /* + * Calculate the end of the regex: + * - an unanchored separator by default; + * - if the last character is a backslash or the separator itself, it + * should be the empty string; + * - if it is a dollar sign, overwrite it with 0 and look at the + * character before it: if it is the separator, only anchor at the + * end, otherwise, add a separator before the anchor. + */ + + p = &(glob_copy[strlen(glob_copy) - 1]); + end = regexes->re_separator; + if (*p == '\\' || *p == separator) { + end = ""; + } else if (*p == '$') { + char prev = *(p - 1); + *p = '\0'; + if (prev == separator) { + *(p-1) = '\0'; + if (p-2 >= glob_copy) { + prev = *(p - 2); + if (prev == '\\') { + *(p-2) = '\0'; + } } - *r++ = (char)ch; - } else if ( ch == '(' - || ch == ')' - || ch == '{' - || ch == '}' - || ch == '+' - || ch == '.' - || ch == '^' - || ch == '$' - || ch == '|') { - *r++ = '\\'; - *r++ = (char)ch; + end = regexes->re_end_full; } else { - *r++ = (char)ch; + end = "$"; } - } - if(last_ch != '\\') { - if(last_ch != separator && last_ch != '$') { - *r++ = '\\'; - *r++ = separator; /* add a trailing \separator */ - } - } - } - *r = '\0'; + } + + regex = amglob_to_regex(g, begin, end, table); + ret = do_match(regex, wrapped_word, TRUE); - i = match(regex,nword); + g_free(glob_copy); + g_free(regex); + } - amfree(nword); - amfree(nglob); - amfree(regex); - return i; +out: + g_free(wrapped_word); + return ret; } +/* + * Match a host expression + */ -int -match_host( - const char * glob, - const char * host) +int match_host(const char *glob, const char *host) { char *lglob, *lhost; - char *c; - const char *d; - int i; - - - lglob = (char *)alloc(strlen(glob)+1); - c = lglob, d=glob; - while( *d != '\0') - *c++ = (char)tolower(*d++); - *c = *d; - - lhost = (char *)alloc(strlen(host)+1); - c = lhost, d=host; - while( *d != '\0') - *c++ = (char)tolower(*d++); - *c = *d; - - i = match_word(lglob, lhost, (int)'.'); - amfree(lglob); - amfree(lhost); - return i; + int ret; + + if (*glob == '=') { + return strcmp(glob+1, host) == 0; + } + lglob = g_ascii_strdown(glob, -1); + lhost = g_ascii_strdown(host, -1); + + ret = match_word(lglob, lhost, '.'); + + g_free(lglob); + g_free(lhost); + return ret; } +/* + * Match a disk expression. Not as straightforward, since Windows paths must be + * accounted for. + */ -int -match_disk( - const char * glob, - const char * disk) +/* + * Convert a disk and glob from Windows expressed paths (backslashes) into Unix + * paths (slashes). + * + * Note: the resulting string is dynamically allocated, it is up to the caller + * to free it. + * + * Note 2: UNC in convert_unc_to_unix stands for Uniform Naming Convention. + */ + +static char *convert_unc_to_unix(const char *unc) +{ + char *result = g_strdup(unc); + return g_strdelimit(result, "\\", '/'); +} + +static char *convert_winglob_to_unix(const char *glob) { - return match_word(glob, disk, '/'); + const char *src; + char *result, *dst; + result = g_malloc(strlen(glob) + 1); + dst = result; + + for (src = glob; *src; src++) { + if (*src == '\\' && *(src + 1) == '\\') { + *dst++ = '/'; + src++; + continue; + } + *dst++ = *src; + } + *dst = '\0'; + return result; } +/* + * Match a disk expression + */ + +int match_disk(const char *glob, const char *disk) +{ + char *glob2 = NULL, *disk2 = NULL; + const char *g = glob, *d = disk; + int result; + + /* + * Check whether our disk potentially refers to a Windows share (the first + * two characters are '\' and there is no / in the word at all): if yes, + * build Unix paths instead and pass those as arguments to match_word() + */ + + gboolean windows_share = !(strncmp(disk, "\\\\", 2) || strchr(disk, '/')); + + if (*glob == '=') { + return strcmp(glob+1, disk) == 0; + } + + if (windows_share) { + glob2 = convert_winglob_to_unix(glob); + disk2 = convert_unc_to_unix(disk); + g = (const char *) glob2; + d = (const char *) disk2; + } + + result = match_word(g, d, '/'); + + /* + * We can g_free(NULL), so this is "safe" + */ + g_free(glob2); + g_free(disk2); + + return result; +} + +/* + * TIMESTAMPS/LEVEL MATCHING + */ + static int alldigits( const char *str) @@ -584,7 +993,11 @@ match_datestamp( if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) { goto illegal; } - + + if (*dateexp == '=') { + return strcmp(dateexp+1, datestamp) == 0; + } + /* strip and ignore an initial "^" */ if(dateexp[0] == '^') { strncpy(mydateexp, dateexp+1, sizeof(mydateexp)-1); @@ -595,6 +1008,10 @@ match_datestamp( mydateexp[sizeof(mydateexp)-1] = '\0'; } + if(strlen(dateexp) < 1) { + goto illegal; + } + if(mydateexp[strlen(mydateexp)-1] == '$') { match_exact = 1; mydateexp[strlen(mydateexp)-1] = '\0'; /* strip the trailing $ */ @@ -613,6 +1030,9 @@ match_datestamp( len = (size_t)(dash - mydateexp); /* length of XXXYYYY */ len_suffix = strlen(dash) - 1; /* length of ZZZZ */ if (len_suffix > len) goto illegal; + if (len < len_suffix) { + goto illegal; + } len_prefix = len - len_suffix; /* length of XXX */ dash++; @@ -633,14 +1053,14 @@ match_datestamp( if (!alldigits(mydateexp)) goto illegal; if(match_exact == 1) { - return (strcmp(datestamp, mydateexp) == 0); + return (g_str_equal(datestamp, mydateexp)); } else { - return (strncmp(datestamp, mydateexp, strlen(mydateexp)) == 0); + return (g_str_has_prefix(datestamp, mydateexp)); } } illegal: - error(_("Illegal datestamp expression %s"),dateexp); + error("Illegal datestamp expression %s", dateexp); /*NOTREACHED*/ } @@ -651,57 +1071,67 @@ match_level( const char * level) { char *dash; - size_t len, len_suffix; - size_t len_prefix; - char lowend[100], highend[100]; + long int low, hi, level_i; char mylevelexp[100]; int match_exact; if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) { - error(_("Illegal level expression %s"),levelexp); + error("Illegal level expression %s", levelexp); /*NOTREACHED*/ } - + + if (*levelexp == '=') { + return strcmp(levelexp+1, level) == 0; + } + if(levelexp[0] == '^') { - strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1); + strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1); mylevelexp[strlen(levelexp)-1] = '\0'; + if (strlen(levelexp) == 0) { + error("Illegal level expression %s", levelexp); + /*NOTREACHED*/ + } } else { strncpy(mylevelexp, levelexp, strlen(levelexp)); mylevelexp[strlen(levelexp)] = '\0'; } - if(mylevelexp[strlen(mylevelexp)] == '$') { + if(mylevelexp[strlen(mylevelexp)-1] == '$') { match_exact = 1; - mylevelexp[strlen(mylevelexp)] = '\0'; + mylevelexp[strlen(mylevelexp)-1] = '\0'; } else match_exact = 0; if((dash = strchr(mylevelexp,'-'))) { if(match_exact == 1) { - error(_("Illegal level expression %s"),levelexp); - /*NOTREACHED*/ + goto illegal; } - len = (size_t)(dash - mylevelexp); - len_suffix = strlen(dash) - 1; - len_prefix = len - len_suffix; - dash++; - strncpy(lowend, mylevelexp, len); - lowend[len] = '\0'; - strncpy(highend, mylevelexp, len_prefix); - strncpy(&(highend[len_prefix]), dash, len_suffix); - highend[len] = '\0'; - return ((strncmp(level, lowend, strlen(lowend)) >= 0) && - (strncmp(level, highend , strlen(highend)) <= 0)); + *dash = '\0'; + if (!alldigits(mylevelexp) || !alldigits(dash+1)) goto illegal; + + errno = 0; + low = strtol(mylevelexp, (char **) NULL, 10); + if (errno) goto illegal; + hi = strtol(dash+1, (char **) NULL, 10); + if (errno) goto illegal; + level_i = strtol(level, (char **) NULL, 10); + if (errno) goto illegal; + + return ((level_i >= low) && (level_i <= hi)); } else { + if (!alldigits(mylevelexp)) goto illegal; if(match_exact == 1) { - return (strcmp(level, mylevelexp) == 0); + return (g_str_equal(level, mylevelexp)); } else { - return (strncmp(level, mylevelexp, strlen(mylevelexp)) == 0); + return (g_str_has_prefix(level, mylevelexp)); } } +illegal: + error("Illegal level expression %s", levelexp); + /*NOTREACHED*/ }