2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1998 University of Maryland at College Park
4 * Copyright (c) 2007-2012 Zmanda, Inc. All Rights Reserved.
7 * Permission to use, copy, modify, distribute, and sell this software and its
8 * documentation for any purpose is hereby granted without fee, provided that
9 * the above copyright notice appear in all copies and that both that
10 * copyright notice and this permission notice appear in supporting
11 * documentation, and that the name of U.M. not be used in advertising or
12 * publicity pertaining to distribution of the software without specific,
13 * written prior permission. U.M. makes no representations about the
14 * suitability of this software for any purpose. It is provided "as is"
15 * without express or implied warranty.
17 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
19 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
21 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
22 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 * Authors: the Amanda Development Team. Its members are listed in a
25 * file named AUTHORS, in the root directory of this distribution.
29 * See match.h for function prototypes and further explanations.
37 * DATA STRUCTURES, MACROS, STATIC DATA
41 * Return codes used by try_match()
45 #define MATCH_NONE (0)
46 #define MATCH_ERROR (-1)
49 * Macro to tell whether a character is a regex metacharacter. Note that '*'
50 * and '?' are NOT included: they are themselves special in globs.
53 #define IS_REGEX_META(c) ( \
54 (c) == '.' || (c) == '(' || (c) == ')' || (c) == '{' || (c) == '}' || \
55 (c) == '+' || (c) == '^' || (c) == '$' || (c) == '|' \
59 * Define a specific type to hold error messages in case regex compile/matching
63 typedef char regex_errbuf[STR_SIZE];
66 * Structure used by amglob_to_regex() to expand particular glob characters. Its
68 * - question_mark: what the question mark ('?') should be replaced with;
69 * - star: what the star ('*') should be replaced with;
70 * - double_star: what two consecutive stars should be replaced with.
72 * Note that apart from double_star, ALL OTHER FIELDS MUST NOT BE NULL.
76 const char *question_mark;
78 const char *double_star;
82 * Susbtitution data for glob_to_regex()
85 static struct subst_table glob_subst_stable = {
86 "[^/]", /* question_mark */
88 NULL /* double_star */
92 * Substitution data for tar_to_regex()
95 static struct subst_table tar_subst_stable = {
96 "[^/]", /* question_mark */
98 NULL /* double_star */
102 * Substitution data for match_word(): dot
105 static struct subst_table mword_dot_subst_table = {
106 "[^.]", /* question_mark */
108 ".*" /* double_star */
112 * Substitution data for match_word(): slash
115 static struct subst_table mword_slash_subst_table = {
116 "[^/]", /* question_mark */
118 ".*" /* double_star */
122 * match_word() specific data:
123 * - re_double_sep: anchored regex matching two separators;
124 * - re_separator: regex matching the separator;
125 * - re_begin_full: regex matching the separator, anchored at the beginning;
126 * - re_end_full: regex matching the separator, andchored at the end.
129 struct mword_regexes {
130 const char *re_double_sep;
131 const char *re_begin_full;
132 const char *re_separator;
133 const char *re_end_full;
136 static struct mword_regexes mword_dot_regexes = {
137 "^\\.\\.$", /* re_double_sep */
138 "^\\.", /* re_begin_full */
139 "\\.", /* re_separator */
140 "\\.$" /* re_end_full */
143 static struct mword_regexes mword_slash_regexes = {
144 "^\\/\\/$", /* re_double_sep */
145 "^\\/", /* re_begin_full */
146 "\\/", /* re_separator */
147 "\\/$" /* re_end_full */
151 * Regular expression caches, and a static mutex to protect initialization and
152 * access. This may be unnecessarily coarse, but it is unknown at this time
153 * whether GHashTable accesses are thread-safe, and get_regex_from_cache() may
154 * be called from within threads, so play it safe.
157 #if (GLIB_MAJOR_VERSION > 2 || (GLIB_MAJOR_VERSION == 2 && GLIB_MINOR_VERSION >= 31))
158 # pragma GCC diagnostic push
159 # pragma GCC diagnostic ignored "-Wmissing-field-initializers"
160 static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
161 # pragma GCC diagnostic pop
163 static GStaticMutex re_cache_mutex = G_STATIC_MUTEX_INIT;
165 static GHashTable *regex_cache = NULL, *regex_cache_newline = NULL;
172 * Initialize regex caches. NOTE: this function MUST be called with
173 * re_cache_mutex LOCKED, see get_regex_from_cache()
176 static void init_regex_caches(void)
178 static gboolean initialized = FALSE;
183 regex_cache = g_hash_table_new(g_str_hash, g_str_equal);
184 regex_cache_newline = g_hash_table_new(g_str_hash, g_str_equal);
190 * Cleanup a regular expression by escaping all non alphanumeric characters, and
191 * append beginning/end anchors if need be
194 char *clean_regex(const char *str, gboolean anchor)
199 result = g_malloc(2 * strlen(str) + 3);
205 for (src = str; *src; src++) {
206 if (!g_ascii_isalnum((int) *src))
219 * Compile one regular expression. Return TRUE if the regex has been compiled
220 * successfully. Otherwise, return FALSE and copy the error message into the
221 * supplied regex_errbuf pointer. Also, we want to know whether flags should
222 * include REG_NEWLINE (See regcomp(3) for details). Since this is the more
223 * frequent case, add REG_NEWLINE to the default flags, and remove it only if
224 * match_newline is set to FALSE.
227 static gboolean do_regex_compile(const char *str, regex_t *regex,
228 regex_errbuf *errbuf, gboolean match_newline)
230 int flags = REG_EXTENDED | REG_NOSUB | REG_NEWLINE;
234 flags &= ~REG_NEWLINE;
236 result = regcomp(regex, str, flags);
241 regerror(result, regex, *errbuf, sizeof(*errbuf));
246 * Get an already compiled buffer from the regex cache. If the regex is not in
247 * the cache, allocate a new one and compile it using do_regex_compile(). If the
248 * compile fails, call regfree() on the object and return NULL to the caller. If
249 * it does succeed, put the regex buffer in cache and return a pointer to it.
252 static regex_t *get_regex_from_cache(const char *re_str, regex_errbuf *errbuf,
253 gboolean match_newline)
258 g_static_mutex_lock(&re_cache_mutex);
262 cache = (match_newline) ? regex_cache_newline: regex_cache;
263 ret = g_hash_table_lookup(cache, re_str);
268 ret = g_new(regex_t, 1);
270 if (do_regex_compile(re_str, ret, errbuf, match_newline)) {
271 g_hash_table_insert(cache, g_strdup(re_str), ret);
280 g_static_mutex_unlock(&re_cache_mutex);
285 * Validate one regular expression using do_regex_compile(), and return NULL if
286 * the regex is valid, or the error message otherwise.
289 char *validate_regexp(const char *regex)
292 static regex_errbuf errmsg;
295 valid = do_regex_compile(regex, ®c, &errmsg, TRUE);
298 return (valid) ? NULL : errmsg;
302 * See if a string matches a compiled regular expression. Return one of MATCH_*
303 * defined above. If, for some reason, regexec() returns something other than
304 * not 0 or REG_NOMATCH, return MATCH_ERROR and print the error message in the
305 * supplied regex_errbuf.
308 static int try_match(regex_t *regex, const char *str,
309 regex_errbuf *errbuf)
311 int result = regexec(regex, str, 0, 0, 0);
318 /* Fall through: something went really wrong */
321 regerror(result, regex, *errbuf, sizeof(*errbuf));
326 * Try and match a string against a regular expression, using
327 * do_regex_compile() and try_match(). Exit early if the regex didn't compile
328 * or there was an error during matching.
331 int do_match(const char *regex, const char *str, gboolean match_newline)
337 re = get_regex_from_cache(regex, &errmsg, match_newline);
340 error("regex \"%s\": %s", regex, errmsg);
343 result = try_match(re, str, &errmsg);
345 if (result == MATCH_ERROR)
346 error("regex \"%s\": %s", regex, errmsg);
353 * DISK/HOST EXPRESSION HANDLING
357 * Check whether a given character should be escaped (that is, prepended with a
358 * backslash), EXCEPT for one character.
361 static gboolean should_be_escaped_except(char c, char not_this_one)
363 if (c == not_this_one)
383 * Take a disk/host expression and turn it into a full-blown amglob (with
384 * start and end anchors) following rules in amanda-match(7). The not_this_one
385 * argument represents a character which is NOT meant to be special in this
386 * case: '/' for disks and '.' for hosts.
389 static char *full_amglob_from_expression(const char *str, char not_this_one)
394 result = g_malloc(2 * strlen(str) + 3);
399 for (src = str; *src; src++) {
400 if (should_be_escaped_except(*src, not_this_one))
411 * Turn a disk/host expression into a regex
414 char *make_exact_disk_expression(const char *disk)
416 return full_amglob_from_expression(disk, '/');
419 char *make_exact_host_expression(const char *host)
421 return full_amglob_from_expression(host, '.');
425 * GLOB HANDLING, as per amanda-match(7)
429 * Turn a glob into a regex.
432 static char *amglob_to_regex(const char *str, const char *begin,
433 const char *end, struct subst_table *table)
439 gboolean double_star = (table->double_star != NULL);
442 * There are two particular cases when building a regex out of a glob:
443 * character classes (anything inside [...] or [!...] and quotes (anything
444 * preceded by a backslash). We start with none being true.
447 gboolean in_character_class = FALSE, in_quote = FALSE;
450 * Allocate enough space for our string. At worst, the allocated space is
451 * the length of the following:
452 * - beginning of regex;
453 * - size of original string multiplied by worst-case expansion;
457 * Calculate the worst case expansion by walking our struct subst_table.
460 worst_case = strlen(table->question_mark);
462 if (worst_case < strlen(table->star))
463 worst_case = strlen(table->star);
465 if (double_star && worst_case < strlen(table->double_star))
466 worst_case = strlen(table->double_star);
468 result = g_malloc(strlen(begin) + strlen(str) * worst_case + strlen(end) + 1);
471 * Start by copying the beginning of the regex...
474 dst = g_stpcpy(result, begin);
477 * ... Now to the meat of it.
480 for (src = str; *src; src++) {
484 * First, check that we're in a character class: each and every
485 * character can be copied as is. We only need to be careful is the
486 * character is a closing bracket: it will end the character class IF
487 * AND ONLY IF it is not preceded by a backslash.
490 if (in_character_class) {
491 in_character_class = ((c != ']') || (*(src - 1) == '\\'));
496 * Are we in a quote? If yes, it is really simple: copy the current
497 * character, close the quote, the end.
506 * The only thing left to handle now is the "normal" case: we are not in
507 * a character class nor in a quote.
512 * Backslash: append it, and open a new quote.
516 } else if (c == '[') {
518 * Opening bracket: the beginning of a character class.
520 * Look ahead the next character: if it's an exclamation mark, then
521 * this is a complemented character class; append a caret to make
522 * the result string regex-friendly, and forward one character in
526 in_character_class = TRUE;
527 if (*(src + 1) == '!') {
531 } else if (IS_REGEX_META(c)) {
533 * Regex metacharacter (except for ? and *, see below): append a
534 * backslash, and then the character itself.
540 * Question mark: take the subsitution string out of our subst_table
541 * and append it to the string.
543 dst = g_stpcpy(dst, table->question_mark);
546 * Star: append the subsitution string found in our subst_table.
547 * However, look forward the next character: if it's yet another
548 * star, then see if there is a substitution string for the double
549 * star and append this one instead.
551 * FIXME: this means that two consecutive stars in a glob string
552 * where there is no substition for double_star can lead to
553 * exponential regex execution time: consider [^/]*[^/]*.
555 const char *p = table->star;
556 if (double_star && *(src + 1) == '*') {
558 p = table->double_star;
560 dst = g_stpcpy(dst, p);
563 * Any other character: append each time.
571 * Done, now append the end, ONLY if we are not in a quote - a lone
572 * backslash at the end of a glob is illegal, just leave it as it, it will
573 * make the regex compile fail.
577 dst = g_stpcpy(dst, end);
590 char *glob_to_regex(const char *glob)
592 return amglob_to_regex(glob, "^", "$", &glob_subst_stable);
595 int match_glob(const char *glob, const char *str)
602 regex = glob_to_regex(glob);
603 re = get_regex_from_cache(regex, &errmsg, TRUE);
606 error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
609 result = try_match(re, str, &errmsg);
611 if (result == MATCH_ERROR)
612 error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
620 char *validate_glob(const char *glob)
622 char *regex, *ret = NULL;
624 static regex_errbuf errmsg;
626 regex = glob_to_regex(glob);
628 if (!do_regex_compile(regex, ®c, &errmsg, TRUE))
640 static char *tar_to_regex(const char *glob)
642 return amglob_to_regex(glob, "(^|/)", "($|/)", &tar_subst_stable);
645 int match_tar(const char *glob, const char *str)
652 regex = tar_to_regex(glob);
653 re = get_regex_from_cache(regex, &errmsg, TRUE);
656 error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
659 result = try_match(re, str, &errmsg);
661 if (result == MATCH_ERROR)
662 error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg);
673 * The functions below wrap input strings with separators and attempt to match
674 * the result. The core of the operation is the match_word() function.
678 * Check whether a glob passed as an argument to match_word() only looks for the
682 static gboolean glob_is_separator_only(const char *glob, char sep) {
683 size_t len = strlen(glob);
684 const char len2_1[] = { '^', sep , 0 }, len2_2[] = { sep, '$', 0 },
685 len3[] = { '^', sep, '$', 0 };
689 return (*glob == sep);
691 return !(!g_str_equal(glob, len2_1) && !g_str_equal(glob, len2_2));
693 return g_str_equal(glob, len3);
700 * Given a word and a separator as an argument, wrap the word with separators -
701 * if need be. For instance, if '/' is the separator, the rules are:
705 * - "//" -> left alone
707 * - "/xxx" -> "/xxx/"
708 * - "xxx/" -> "/xxx/"
709 * - "/xxx/" -> left alone
711 * (note that xxx here may contain the separator as well)
713 * Note that the returned string is dynamically allocated: it is up to the
714 * caller to free it. Note also that the first argument MUST NOT BE NULL.
717 static char *wrap_word(const char *word, const char separator, const char *glob)
719 size_t len = strlen(word);
720 size_t len_glob = strlen(glob);
724 * We allocate for the worst case, which is two bytes more than the input
725 * (have to prepend and append a separator).
727 result = g_malloc(len + 3);
731 * Zero-length: separator only
740 * Length is one: if the only character is the separator only, the result
741 * string is two separators
744 if (len == 1 && word[0] == separator) {
751 * Otherwise: prepend the separator if needed, append the separator if
755 if (word[0] != separator && glob[0] != '^')
758 p = g_stpcpy(p, word);
760 if (word[len - 1] != separator && glob[len_glob-1] != '$')
768 static int match_word(const char *glob, const char *word, const char separator)
770 char *wrapped_word = wrap_word(word, separator, glob);
771 struct mword_regexes *regexes = &mword_slash_regexes;
772 struct subst_table *table = &mword_slash_subst_table;
773 gboolean not_slash = (separator != '/');
777 * We only expect two separators: '/' or '.'. If it's not '/', it has to be
781 regexes = &mword_dot_regexes;
782 table = &mword_dot_subst_table;
785 if(glob_is_separator_only(glob, separator)) {
786 ret = do_match(regexes->re_double_sep, wrapped_word, TRUE);
790 * Unlike what happens for tar and disk expressions, we need to
791 * calculate the beginning and end of our regex before calling
795 const char *begin, *end;
796 char *glob_copy = g_strdup(glob);
797 char *p, *g = glob_copy;
801 * Calculate the beginning of the regex:
802 * - by default, it is an unanchored separator;
803 * - if the glob begins with a caret, make that an anchored separator,
804 * and increment g appropriately;
805 * - if it begins with a separator, make it the empty string.
809 begin = regexes->re_separator;
814 if (*p == separator) {
815 begin = regexes->re_begin_full;
818 } else if (*p == separator)
822 * Calculate the end of the regex:
823 * - an unanchored separator by default;
824 * - if the last character is a backslash or the separator itself, it
825 * should be the empty string;
826 * - if it is a dollar sign, overwrite it with 0 and look at the
827 * character before it: if it is the separator, only anchor at the
828 * end, otherwise, add a separator before the anchor.
831 p = &(glob_copy[strlen(glob_copy) - 1]);
832 end = regexes->re_separator;
833 if (*p == '\\' || *p == separator) {
835 } else if (*p == '$') {
836 char prev = *(p - 1);
838 if (prev == separator) {
840 if (p-2 >= glob_copy) {
846 end = regexes->re_end_full;
852 regex = amglob_to_regex(g, begin, end, table);
853 ret = do_match(regex, wrapped_word, TRUE);
860 g_free(wrapped_word);
865 * Match a host expression
868 int match_host(const char *glob, const char *host)
874 return strcmp(glob+1, host) == 0;
876 lglob = g_ascii_strdown(glob, -1);
877 lhost = g_ascii_strdown(host, -1);
879 ret = match_word(lglob, lhost, '.');
887 * Match a disk expression. Not as straightforward, since Windows paths must be
892 * Convert a disk and glob from Windows expressed paths (backslashes) into Unix
895 * Note: the resulting string is dynamically allocated, it is up to the caller
898 * Note 2: UNC in convert_unc_to_unix stands for Uniform Naming Convention.
901 static char *convert_unc_to_unix(const char *unc)
903 char *result = g_strdup(unc);
904 return g_strdelimit(result, "\\", '/');
907 static char *convert_winglob_to_unix(const char *glob)
911 result = g_malloc(strlen(glob) + 1);
914 for (src = glob; *src; src++) {
915 if (*src == '\\' && *(src + 1) == '\\') {
927 * Match a disk expression
930 int match_disk(const char *glob, const char *disk)
932 char *glob2 = NULL, *disk2 = NULL;
933 const char *g = glob, *d = disk;
937 * Check whether our disk potentially refers to a Windows share (the first
938 * two characters are '\' and there is no / in the word at all): if yes,
939 * build Unix paths instead and pass those as arguments to match_word()
942 gboolean windows_share = !(strncmp(disk, "\\\\", 2) || strchr(disk, '/'));
945 return strcmp(glob+1, disk) == 0;
949 glob2 = convert_winglob_to_unix(glob);
950 disk2 = convert_unc_to_unix(disk);
951 g = (const char *) glob2;
952 d = (const char *) disk2;
955 result = match_word(g, d, '/');
958 * We can g_free(NULL), so this is "safe"
967 * TIMESTAMPS/LEVEL MATCHING
975 if (!isdigit((int)*(str++)))
983 const char * dateexp,
984 const char * datestamp)
987 size_t len, len_suffix;
989 char firstdate[100], lastdate[100];
993 if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
997 if (*dateexp == '=') {
998 return strcmp(dateexp+1, datestamp) == 0;
1001 /* strip and ignore an initial "^" */
1002 if(dateexp[0] == '^') {
1003 strncpy(mydateexp, dateexp+1, sizeof(mydateexp)-1);
1004 mydateexp[sizeof(mydateexp)-1] = '\0';
1007 strncpy(mydateexp, dateexp, sizeof(mydateexp)-1);
1008 mydateexp[sizeof(mydateexp)-1] = '\0';
1011 if(strlen(dateexp) < 1) {
1015 if(mydateexp[strlen(mydateexp)-1] == '$') {
1017 mydateexp[strlen(mydateexp)-1] = '\0'; /* strip the trailing $ */
1022 /* a single dash represents a date range */
1023 if((dash = strchr(mydateexp,'-'))) {
1024 if(match_exact == 1 || strchr(dash+1, '-')) {
1028 /* format: XXXYYYY-ZZZZ, indicating dates XXXYYYY to XXXZZZZ */
1030 len = (size_t)(dash - mydateexp); /* length of XXXYYYY */
1031 len_suffix = strlen(dash) - 1; /* length of ZZZZ */
1032 if (len_suffix > len) goto illegal;
1033 if (len < len_suffix) {
1036 len_prefix = len - len_suffix; /* length of XXX */
1040 strncpy(firstdate, mydateexp, len);
1041 firstdate[len] = '\0';
1042 strncpy(lastdate, mydateexp, len_prefix);
1043 strncpy(&(lastdate[len_prefix]), dash, len_suffix);
1044 lastdate[len] = '\0';
1045 if (!alldigits(firstdate) || !alldigits(lastdate))
1047 if (strncmp(firstdate, lastdate, strlen(firstdate)) > 0)
1049 return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
1050 (strncmp(datestamp, lastdate , strlen(lastdate)) <= 0));
1053 if (!alldigits(mydateexp))
1055 if(match_exact == 1) {
1056 return (g_str_equal(datestamp, mydateexp));
1059 return (g_str_has_prefix(datestamp, mydateexp));
1063 error("Illegal datestamp expression %s", dateexp);
1070 const char * levelexp,
1074 long int low, hi, level_i;
1075 char mylevelexp[100];
1078 if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
1079 error("Illegal level expression %s", levelexp);
1083 if (*levelexp == '=') {
1084 return strcmp(levelexp+1, level) == 0;
1087 if(levelexp[0] == '^') {
1088 strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
1089 mylevelexp[strlen(levelexp)-1] = '\0';
1090 if (strlen(levelexp) == 0) {
1091 error("Illegal level expression %s", levelexp);
1096 strncpy(mylevelexp, levelexp, strlen(levelexp));
1097 mylevelexp[strlen(levelexp)] = '\0';
1100 if(mylevelexp[strlen(mylevelexp)-1] == '$') {
1102 mylevelexp[strlen(mylevelexp)-1] = '\0';
1107 if((dash = strchr(mylevelexp,'-'))) {
1108 if(match_exact == 1) {
1113 if (!alldigits(mylevelexp) || !alldigits(dash+1)) goto illegal;
1116 low = strtol(mylevelexp, (char **) NULL, 10);
1117 if (errno) goto illegal;
1118 hi = strtol(dash+1, (char **) NULL, 10);
1119 if (errno) goto illegal;
1120 level_i = strtol(level, (char **) NULL, 10);
1121 if (errno) goto illegal;
1123 return ((level_i >= low) && (level_i <= hi));
1126 if (!alldigits(mylevelexp)) goto illegal;
1127 if(match_exact == 1) {
1128 return (g_str_equal(level, mylevelexp));
1131 return (g_str_has_prefix(level, mylevelexp));
1135 error("Illegal level expression %s", levelexp);