2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1998 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: match.c,v 1.23 2006/05/25 01:47:12 johnfranks Exp $
29 * functions for checking and matching regular expressions
36 static int match_word(const char *glob, const char *word, const char separator);
37 static char *tar_to_regex(const char *glob);
40 * REGEX MATCHING FUNCTIONS
44 * Define a specific type to hold error messages in case regex compile/matching
48 typedef char regex_errbuf[STR_SIZE];
51 * Validate one regular expression. If the regex is invalid, copy the error
52 * message into the supplied regex_errbuf pointer. Also, we want to know whether
53 * flags should include REG_NEWLINE (See regcomp(3) for details). Since this is
54 * the more frequent case, add REG_NEWLINE to the default flags, and remove it
55 * only if match_newline is set to FALSE.
58 static gboolean do_validate_regex(const char *str, regex_t *regex,
59 regex_errbuf *errbuf, gboolean match_newline)
61 int flags = REG_EXTENDED | REG_NOSUB | REG_NEWLINE;
65 CLR(flags, REG_NEWLINE);
67 result = regcomp(regex, str, flags);
72 regerror(result, regex, *errbuf, SIZEOF(*errbuf));
77 * See if a string matches a regular expression. Return one of MATCH_* defined
78 * below. If, for some reason, regexec() returns something other than not 0 or
79 * REG_NOMATCH, return MATCH_ERROR and print the error message in the supplied
84 #define MATCH_NONE (0)
85 #define MATCH_ERROR (-1)
87 static int try_match(regex_t *regex, const char *str,
90 int result = regexec(regex, str, 0, 0, 0);
97 /* Fall through: something went really wrong */
100 regerror(result, regex, *errbuf, SIZEOF(*errbuf));
109 static regex_errbuf errmsg;
112 valid = do_validate_regex(regex, ®c, &errmsg, TRUE);
115 return (valid) ? NULL : errmsg;
126 result = alloc(2*strlen(str)+3);
131 for(i=0;i<strlen(str);i++) {
132 if(!isalnum((int)str[i]))
143 * Check whether a given character should be escaped (that is, prepended with a
144 * backslash), EXCEPT for one character.
147 static gboolean should_be_escaped_except(char c, char not_this_one)
149 if (c == not_this_one)
169 * Take a disk/host expression and turn it into a full-blown amglob (with
170 * start and end anchors) following rules in amanda-match(7). The not_this_one
171 * argument represents a character which is NOT meant to be special in this
172 * case: '/' for disks and '.' for hosts.
175 static char *full_amglob_from_expression(const char *str, char not_this_one)
180 result = alloc(2 * strlen(str) + 3);
185 for (src = str; *src; src++) {
186 if (should_be_escaped_except(*src, not_this_one))
197 make_exact_host_expression(
200 return full_amglob_from_expression(host, '.');
204 make_exact_disk_expression(
207 return full_amglob_from_expression(disk, '/');
210 int do_match(const char *regex, const char *str, gboolean match_newline)
217 ok = do_validate_regex(regex, ®c, &errmsg, match_newline);
220 error(_("regex \"%s\": %s"), regex, errmsg);
223 result = try_match(®c, str, &errmsg);
225 if (result == MATCH_ERROR)
226 error(_("regex \"%s\": %s"), regex, errmsg);
238 char *regex, *ret = NULL;
240 static regex_errbuf errmsg;
242 regex = glob_to_regex(glob);
244 if (!do_validate_regex(regex, ®c, &errmsg, TRUE))
263 regex = glob_to_regex(glob);
264 ok = do_validate_regex(regex, ®c, &errmsg, TRUE);
267 error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
270 result = try_match(®c, str, &errmsg);
272 if (result == MATCH_ERROR)
273 error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
283 * Macro to tell whether a character is a regex metacharacter. Note that '*'
284 * and '?' are NOT included: they are themselves special in globs.
287 #define IS_REGEX_META(c) ( \
288 (c) == '.' || (c) == '(' || (c) == ')' || (c) == '{' || (c) == '}' || \
289 (c) == '+' || (c) == '^' || (c) == '$' || (c) == '|' \
293 * EXPANDING A MATCH TO A REGEX (as per amanda-match(7))
295 * The function at the code of this operation is amglob_to_regex(). It
296 * takes three arguments: the string to convert, a substitution table and a
297 * worst-case expansion.
299 * The substitution table, defined right below, is used to replace particular
300 * string positions and/or characters. Its fields are:
301 * - begin: what the beginnin of the string should be replaced with;
302 * - end: what the end of the string should be replaced with;
303 * - question_mark: what the question mark ('?') should be replaced with;
304 * - star: what the star ('*') should be replaced with;
305 * - double_star: what two consecutive stars should be replaced with.
307 * Note that apart from double_star, ALL OTHER FIELDS MUST NOT BE NULL
313 const char *question_mark;
315 const char *double_star;
318 static char *amglob_to_regex(const char *str, struct subst_table *table,
326 * There are two particular cases when building a regex out of a glob:
327 * character classes (anything inside [...] or [!...] and quotes (anything
328 * preceded by a backslash). We start with none being true.
331 gboolean in_character_class = FALSE, in_quote = FALSE;
334 * Allocate enough space for our string. At worst, the allocated space is
335 * the length of the following:
336 * - beginning of regex;
337 * - size of original string multiplied by worst-case expansion;
342 result = alloc(strlen(table->begin) + strlen(str) * worst_case
343 + strlen(table->end) + 1);
346 * Start by copying the beginning of the regex...
349 dst = g_stpcpy(result, table->begin);
352 * ... Now to the meat of it.
355 for (src = str; *src; src++) {
359 * First, check that we're in a character class: each and every
360 * character can be copied as is. We only need to be careful is the
361 * character is a closing bracket: it will end the character class IF
362 * AND ONLY IF it is not preceded by a backslash.
365 if (in_character_class) {
366 in_character_class = ((c != ']') || (*(src - 1) == '\\'));
371 * Are we in a quote? If yes, it is really simple: copy the current
372 * character, close the quote, the end.
381 * The only thing left to handle now is the "normal" case: we are not in
382 * a character class nor in a quote.
387 * Backslash: append it, and open a new quote.
391 } else if (c == '[') {
393 * Opening bracket: the beginning of a character class.
395 * Look ahead the next character: if it's an exclamation mark, then
396 * this is a complemented character class; append a caret to make
397 * the result string regex-friendly, and forward one character in
401 in_character_class = TRUE;
402 if (*(src + 1) == '!') {
406 } else if (IS_REGEX_META(c)) {
408 * Regex metacharacter (except for ? and *, see below): append a
409 * backslash, and then the character itself.
415 * Question mark: take the subsitution string out of our subst_table
416 * and append it to the string.
418 dst = g_stpcpy(dst, table->question_mark);
421 * Star: append the subsitution string found in our subst_table.
422 * However, look forward the next character: if it's yet another
423 * star, then see if there is a substitution string for the double
424 * star and append this one instead.
426 * FIXME: this means that two consecutive stars in a glob string
427 * where there is no substition for double_star can lead to
428 * exponential regex execution time: consider [^/]*[^/]*.
430 const char *p = table->star;
431 if (*(src + 1) == '*' && table->double_star) {
433 p = table->double_star;
435 dst = g_stpcpy(dst, p);
438 * Any other character: append each time.
446 * Done, now append the end, ONLY if we are not in a quote - a lone
447 * backslash at the end of a glob is illegal, just leave it as it, it will
448 * make the regex compile fail.
452 dst = g_stpcpy(dst, table->end);
461 static struct subst_table glob_subst_stable = {
464 "[^/]", /* question_mark */
466 NULL /* double_star */
469 static size_t glob_worst_case = 5; /* star */
475 return amglob_to_regex(glob, &glob_subst_stable, glob_worst_case);
489 regex = tar_to_regex(glob);
490 ok = do_validate_regex(regex, ®c, &errmsg, TRUE);
493 error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
496 result = try_match(®c, str, &errmsg);
498 if (result == MATCH_ERROR)
499 error(_("glob \"%s\" -> regex \"%s\": %s"), glob, regex, errmsg);
508 static struct subst_table tar_subst_stable = {
511 "[^/]", /* question_mark */
513 NULL /* double_star */
516 static size_t tar_worst_case = 5; /* begin or end */
522 return amglob_to_regex(glob, &tar_subst_stable, tar_worst_case);
526 * Two utility functions used by match_disk() below: they are used to convert a
527 * disk and glob from Windows expressed paths (backslashes) into Unix paths
530 * Note: the resulting string is dynamically allocated, it is up to the caller
533 * Note 2: UNC in convert_unc_to_unix stands for Uniform Naming Convention.
536 static char *convert_unc_to_unix(const char *unc)
540 result = alloc(strlen(unc) + 1);
543 for (src = unc; *src; src++)
544 *(dst++) = (*src == '\\') ? '/' : *src;
550 static char *convert_winglob_to_unix(const char *glob)
554 result = alloc(strlen(glob) + 1);
557 for (src = glob; *src; src++) {
558 if (*src == '\\' && *(src + 1) == '\\') {
570 * Check whether a glob passed as an argument to match_word() only looks for the
574 static gboolean glob_is_separator_only(const char *glob, char sep) {
575 size_t len = strlen(glob);
576 const char len2_1[] = { '^', sep , 0 }, len2_2[] = { sep, '$', 0 },
577 len3[] = { '^', sep, '$', 0 };
581 return (*glob == sep);
583 return !(strcmp(glob, len2_1) && strcmp(glob, len2_2));
585 return !strcmp(glob, len3);
595 const char separator)
606 lenword = strlen(word);
607 nword = (char *)alloc(lenword + 3);
611 if(lenword == 1 && *src == separator) {
616 if(*src != separator)
620 if(*(dst-1) != separator)
626 nglob = stralloc(glob);
628 if(glob_is_separator_only(nglob, separator)) {
629 regex = alloc(7); /* Length of what is written below plus '\0' */
640 * Unlike what happens for tar and disk expressions, here the
641 * substitution table needs to be dynamically allocated. When we enter
642 * here, we know what the expansions will be for the question mark, the
643 * star and the double star, and also the worst case expansion. We
644 * calculate the begin and end expansions below.
647 #define MATCHWORD_STAR_EXPANSION(c) (const char []) { \
648 '[', '^', (c), ']', '*', 0 \
650 #define MATCHWORD_QUESTIONMARK_EXPANSION(c) (const char []) { \
651 '[', '^', (c), ']', 0 \
653 #define MATCHWORD_DOUBLESTAR_EXPANSION ".*"
655 struct subst_table table;
656 size_t worst_case = 5;
657 const char *begin, *end;
661 * Calculate the beginning of the regex:
662 * - by default, it is an unanchored separator;
663 * - if the glob begins with a caret, make that an anchored separator,
664 * and increment g appropriately;
665 * - if it begins with a separator, make it the empty string.
670 #define REGEX_BEGIN_FULL(c) (const char[]) { '^', '\\', (c), 0 }
671 #define REGEX_BEGIN_NOANCHOR(c) (const char[]) { '\\', (c), 0 }
672 #define REGEX_BEGIN_ANCHORONLY "^" /* Unused, but defined for consistency */
673 #define REGEX_BEGIN_EMPTY ""
675 begin = REGEX_BEGIN_NOANCHOR(separator);
678 begin = REGEX_BEGIN_FULL(separator);
682 } else if (*p == separator)
683 begin = REGEX_BEGIN_EMPTY;
686 * Calculate the end of the regex:
687 * - an unanchored separator by default;
688 * - if the last character is a backslash or the separator itself, it
689 * should be the empty string;
690 * - if it is a dollar sign, overwrite it with 0 and look at the
691 * character before it: if it is the separator, only anchor at the
692 * end, otherwise, add a separator before the anchor.
695 p = &(nglob[strlen(nglob) - 1]);
697 #define REGEX_END_FULL(c) (const char[]) { '\\', (c), '$', 0 }
698 #define REGEX_END_NOANCHOR(c) REGEX_BEGIN_NOANCHOR(c)
699 #define REGEX_END_ANCHORONLY "$"
700 #define REGEX_END_EMPTY REGEX_BEGIN_EMPTY
702 end = REGEX_END_NOANCHOR(separator);
704 if (*p == '\\' || *p == separator)
705 end = REGEX_END_EMPTY;
706 else if (*p == '$') {
707 char prev = *(p - 1);
709 if (prev == separator)
710 end = REGEX_END_ANCHORONLY;
712 end = REGEX_END_FULL(separator);
716 * Fill in our substitution table and generate the regex
721 table.question_mark = MATCHWORD_QUESTIONMARK_EXPANSION(separator);
722 table.star = MATCHWORD_STAR_EXPANSION(separator);
723 table.double_star = MATCHWORD_DOUBLESTAR_EXPANSION;
725 regex = amglob_to_regex(g, &table, worst_case);
728 ret = do_match(regex, nword, TRUE);
747 lglob = g_ascii_strdown(glob, -1);
748 lhost = g_ascii_strdown(host, -1);
750 ret = match_word(lglob, lhost, '.');
763 char *glob2 = NULL, *disk2 = NULL;
764 const char *g = glob, *d = disk;
768 * Check whether our disk potentially refers to a Windows share (the first
769 * two characters are '\' and there is no / in the word at all): if yes,
770 * convert all double backslashes to slashes in the glob, and simple
771 * backslashes into slashes in the disk, and pass these new strings as
772 * arguments instead of the originals.
774 gboolean windows_share = !(strncmp(disk, "\\\\", 2) || strchr(disk, '/'));
777 glob2 = convert_winglob_to_unix(glob);
778 disk2 = convert_unc_to_unix(disk);
779 g = (const char *) glob2;
780 d = (const char *) disk2;
783 result = match_word(g, d, '/');
786 * We can amfree(NULL), so this is "safe"
799 if (!isdigit((int)*(str++)))
807 const char * dateexp,
808 const char * datestamp)
811 size_t len, len_suffix;
813 char firstdate[100], lastdate[100];
817 if(strlen(dateexp) >= 100 || strlen(dateexp) < 1) {
821 /* strip and ignore an initial "^" */
822 if(dateexp[0] == '^') {
823 strncpy(mydateexp, dateexp+1, sizeof(mydateexp)-1);
824 mydateexp[sizeof(mydateexp)-1] = '\0';
827 strncpy(mydateexp, dateexp, sizeof(mydateexp)-1);
828 mydateexp[sizeof(mydateexp)-1] = '\0';
831 if(mydateexp[strlen(mydateexp)-1] == '$') {
833 mydateexp[strlen(mydateexp)-1] = '\0'; /* strip the trailing $ */
838 /* a single dash represents a date range */
839 if((dash = strchr(mydateexp,'-'))) {
840 if(match_exact == 1 || strchr(dash+1, '-')) {
844 /* format: XXXYYYY-ZZZZ, indicating dates XXXYYYY to XXXZZZZ */
846 len = (size_t)(dash - mydateexp); /* length of XXXYYYY */
847 len_suffix = strlen(dash) - 1; /* length of ZZZZ */
848 if (len_suffix > len) goto illegal;
849 len_prefix = len - len_suffix; /* length of XXX */
853 strncpy(firstdate, mydateexp, len);
854 firstdate[len] = '\0';
855 strncpy(lastdate, mydateexp, len_prefix);
856 strncpy(&(lastdate[len_prefix]), dash, len_suffix);
857 lastdate[len] = '\0';
858 if (!alldigits(firstdate) || !alldigits(lastdate))
860 if (strncmp(firstdate, lastdate, strlen(firstdate)) > 0)
862 return ((strncmp(datestamp, firstdate, strlen(firstdate)) >= 0) &&
863 (strncmp(datestamp, lastdate , strlen(lastdate)) <= 0));
866 if (!alldigits(mydateexp))
868 if(match_exact == 1) {
869 return (strcmp(datestamp, mydateexp) == 0);
872 return (strncmp(datestamp, mydateexp, strlen(mydateexp)) == 0);
876 error(_("Illegal datestamp expression %s"),dateexp);
883 const char * levelexp,
887 long int low, hi, level_i;
888 char mylevelexp[100];
891 if(strlen(levelexp) >= 100 || strlen(levelexp) < 1) {
892 error(_("Illegal level expression %s"),levelexp);
896 if(levelexp[0] == '^') {
897 strncpy(mylevelexp, levelexp+1, strlen(levelexp)-1);
898 mylevelexp[strlen(levelexp)-1] = '\0';
901 strncpy(mylevelexp, levelexp, strlen(levelexp));
902 mylevelexp[strlen(levelexp)] = '\0';
905 if(mylevelexp[strlen(mylevelexp)-1] == '$') {
907 mylevelexp[strlen(mylevelexp)-1] = '\0';
912 if((dash = strchr(mylevelexp,'-'))) {
913 if(match_exact == 1) {
918 if (!alldigits(mylevelexp) || !alldigits(dash+1)) goto illegal;
921 low = strtol(mylevelexp, (char **) NULL, 10);
922 if (errno) goto illegal;
923 hi = strtol(dash+1, (char **) NULL, 10);
924 if (errno) goto illegal;
925 level_i = strtol(level, (char **) NULL, 10);
926 if (errno) goto illegal;
928 return ((level_i >= low) && (level_i <= hi));
931 if (!alldigits(mylevelexp)) goto illegal;
932 if(match_exact == 1) {
933 return (strcmp(level, mylevelexp) == 0);
936 return (strncmp(level, mylevelexp, strlen(mylevelexp)) == 0);
940 error(_("Illegal level expression %s"),levelexp);