2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1999 University of Maryland at College Park
4 * Copyright (c) 2007-2012 Zmanda, Inc. All Rights Reserved.
7 * Permission to use, copy, modify, distribute, and sell this software and its
8 * documentation for any purpose is hereby granted without fee, provided that
9 * the above copyright notice appear in all copies and that both that
10 * copyright notice and this permission notice appear in supporting
11 * documentation, and that the name of U.M. not be used in advertising or
12 * publicity pertaining to distribution of the software without specific,
13 * written prior permission. U.M. makes no representations about the
14 * suitability of this software for any purpose. It is provided "as is"
15 * without express or implied warranty.
17 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
19 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
21 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
22 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 * Authors: the Amanda Development Team. Its members are listed in a
25 * file named AUTHORS, in the root directory of this distribution.
28 * $Id: planner.c 10421 2008-03-06 18:48:30Z martineau $
30 * backup schedule planner for the Amanda backup system.
45 #include "amfeatures.h"
46 #include "server_util.h"
48 #include "timestamp.h"
51 #define planner_debug(i,x) do { \
52 if ((i) <= debug_planner) { \
57 #define MAX_LEVELS 3 /* max# of estimates per filesys */
59 #define RUNS_REDZONE 5 /* should be in conf file? */
61 #define PROMOTE_THRESHOLD 0.05 /* if <5% unbalanced, don't promote */
62 #define DEFAULT_DUMPRATE 1024.0 /* K/s */
64 /* configuration file stuff */
67 gint64 conf_maxdumpsize;
70 int conf_runspercycle;
75 int conf_usetimestamps;
77 #define HOST_READY ((void *)0) /* must be 0 */
78 #define HOST_ACTIVE ((void *)1)
79 #define HOST_DONE ((void *)2)
81 #define DISK_READY 0 /* must be 0 */
83 #define DISK_PARTIALY_DONE 2
86 typedef struct one_est_s {
88 gint64 nsize; /* native size */
89 gint64 csize; /* compressed size */
91 int guessed; /* If server guessed the estimate size */
93 static one_est_t default_one_est = {-1, -1, -1, "INVALID_DATE", 0};
95 typedef struct est_s {
101 one_est_t estimate[MAX_LEVELS];
103 gint64 last_lev0size;
108 double fullrate, incrrate;
109 double fullcomp, incrcomp;
115 #define est(dp) ((est_t *)(dp)->up)
117 /* pestq = partial estimate */
118 disklist_t startq, waitq, pestq, estq, failq, schedq;
120 double total_lev0, balanced_size, balance_threshold;
126 size_t tt_blocksize_kb;
127 int runs_per_cycle = 0;
129 char *planner_timestamp = NULL;
131 static am_feature_t *our_features = NULL;
132 static char *our_feature_string = NULL;
134 /* We keep a LIFO queue of before images for all modifications made
135 * to schedq in our attempt to make the schedule fit on the tape.
136 * Enough information is stored to reinstate a dump if it turns out
137 * that it shouldn't have been touched after all.
139 typedef struct bi_s {
142 int deleted; /* 0=modified, 1=deleted */
143 disk_t *dp; /* The disk that was changed */
144 int level; /* The original level */
145 gint64 nsize; /* The original native size */
146 gint64 csize; /* The original compressed size */
147 char *errstr; /* A message describing why this disk is here */
150 typedef struct bilist_s {
154 bilist_t biq; /* The BI queue itself */
157 * ========================================================================
162 static void setup_estimate(disk_t *dp);
163 static void get_estimates(void);
164 static void analyze_estimate(disk_t *dp);
165 static void handle_failed(disk_t *dp);
166 static void delay_dumps(void);
167 static int promote_highest_priority_incremental(void);
168 static int promote_hills(void);
169 static void output_scheduleline(disk_t *dp);
170 static void server_estimate(disk_t *dp, int i, info_t *info, int level);
171 int main(int, char **);
187 times_t section_start;
191 config_overrides_t *cfg_ovr = NULL;
192 char *cfg_opt = NULL;
194 int exit_status = EXIT_SUCCESS;
195 gboolean no_taper = FALSE;
196 gboolean from_client = FALSE;
197 gboolean exact_match = FALSE;
199 if (argc > 1 && argv && argv[1] && g_str_equal(argv[1], "--version")) {
200 printf("planner-%s\n", VERSION);
205 * Configure program for internationalization:
206 * 1) Only set the message locale for now.
207 * 2) Set textdomain for all amanda related programs to "amanda"
208 * We don't want to be forced to support dozens of message catalogs.
210 setlocale(LC_MESSAGES, "C");
211 textdomain("amanda");
213 /* drop root privileges */
214 planner_setuid = set_root_privs(0);
218 set_pname("planner");
220 dbopen(DBG_SUBDIR_SERVER);
222 cfg_ovr = extract_commandline_config_overrides(&argc, &argv);
226 set_config_overrides(cfg_ovr);
227 config_init(CONFIG_INIT_EXPLICIT_NAME | CONFIG_INIT_USE_CWD, cfg_opt);
229 /* conf_diskfile is freed later, as it may be used in an error message */
230 conf_diskfile = config_dir_relative(getconf_str(CNF_DISKFILE));
231 read_diskfile(conf_diskfile, &origq);
232 disable_skip_disk(&origq);
234 /* Don't die when child closes pipe */
235 signal(SIGPIPE, SIG_IGN);
237 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
239 add_amanda_log_handler(amanda_log_stderr);
240 add_amanda_log_handler(amanda_log_trace_log);
242 if (!planner_setuid) {
243 error(_("planner must be run setuid root"));
246 if (config_errors(NULL) >= CFGERR_ERRORS) {
247 g_critical(_("errors processing config file"));
252 check_running_as(RUNNING_AS_ROOT | RUNNING_AS_UID_ONLY);
254 dbrename(get_config_name(), DBG_SUBDIR_SERVER);
257 section_start = curclock();
259 our_features = am_init_feature_set();
260 our_feature_string = am_feature_to_string(our_features);
262 log_add(L_INFO, "%s pid %ld", get_pname(), (long)getpid());
263 g_fprintf(stderr, _("%s: pid %ld executable %s version %s\n"),
264 get_pname(), (long) getpid(), argv[0], VERSION);
265 for (i = 0; version_info[i] != NULL; i++)
266 g_fprintf(stderr, _("%s: %s"), get_pname(), version_info[i]);
269 if (argc - diskarg_offset > 1 && strcmp(argv[diskarg_offset], "--starttime") == 0) {
270 planner_timestamp = stralloc(argv[diskarg_offset+1]);
273 if (argc - diskarg_offset > 0 && strcmp(argv[diskarg_offset], "--no-taper") == 0) {
277 if (argc - diskarg_offset > 0 && strcmp(argv[diskarg_offset], "--from-client") == 0) {
281 if (argc - diskarg_offset > 0 && g_str_equal(argv[diskarg_offset],
288 run_server_global_scripts(EXECUTE_ON_PRE_ESTIMATE, get_config_name());
291 * 1. Networking Setup
298 * 2. Read in Configuration Information
300 * All the Amanda configuration files are loaded before we begin.
303 g_fprintf(stderr,_("READING CONF INFO...\n"));
305 if(origq.head == NULL) {
306 error(_("empty disklist \"%s\""), conf_diskfile);
310 amfree(conf_diskfile);
312 conf_tapelist = config_dir_relative(getconf_str(CNF_TAPELIST));
313 if(read_tapelist(conf_tapelist)) {
314 error(_("could not load tapelist \"%s\""), conf_tapelist);
317 amfree(conf_tapelist);
319 conf_infofile = config_dir_relative(getconf_str(CNF_INFOFILE));
320 if(open_infofile(conf_infofile)) {
321 error(_("could not open info db \"%s\""), conf_infofile);
324 if (check_infofile(conf_infofile, &origq, &errstr) == -1) {
325 log_add(L_WARNING, "problem copying infofile: %s", errstr);
328 amfree(conf_infofile);
330 conf_tapetype = getconf_str(CNF_TAPETYPE);
331 conf_maxdumpsize = getconf_int64(CNF_MAXDUMPSIZE);
332 conf_runtapes = getconf_int(CNF_RUNTAPES);
333 conf_dumpcycle = getconf_int(CNF_DUMPCYCLE);
334 conf_runspercycle = getconf_int(CNF_RUNSPERCYCLE);
335 conf_tapecycle = getconf_int(CNF_TAPECYCLE);
336 conf_etimeout = (time_t)getconf_int(CNF_ETIMEOUT);
337 conf_reserve = getconf_int(CNF_RESERVE);
338 conf_autoflush = getconf_no_yes_all(CNF_AUTOFLUSH);
339 conf_usetimestamps = getconf_boolean(CNF_USETIMESTAMPS);
342 if (planner_timestamp) {
343 if (conf_usetimestamps == 0) {
344 planner_timestamp[8] = '\0';
346 } else if(conf_usetimestamps == 0) {
347 planner_timestamp = get_datestamp_from_time(0);
350 planner_timestamp = get_timestamp_from_time(0);
352 log_add(L_START, _("date %s"), planner_timestamp);
353 g_printf("DATE %s\n", planner_timestamp);
355 g_fprintf(stderr, _("%s: timestamp %s\n"),
356 get_pname(), planner_timestamp);
358 errstr = match_disklist(&origq, exact_match, argc-diskarg_offset,
359 argv+diskarg_offset);
361 g_fprintf(stderr,"%s",errstr);
362 exit_status = EXIT_FAILURE;
365 for (dp = origq.head; dp != NULL; dp = dp->next) {
368 if (!dp->dump_limit || !dp->dump_limit->same_host)
371 if (dp->dump_limit && !dp->dump_limit->server)
378 for (dp = origq.head; dp != NULL; dp = dp->next) {
380 qname = quote_string(dp->name);
381 log_add(L_DISK, "%s %s", dp->host->hostname, qname);
389 error(_("no DLE to backup; %s"), errstr);
391 error(_("no DLE to backup"));
395 log_add(L_WARNING, "WARNING: %s", errstr);
399 /* some initializations */
401 if(conf_runspercycle == 0) {
402 runs_per_cycle = conf_dumpcycle;
403 } else if(conf_runspercycle == -1 ) {
404 runs_per_cycle = guess_runs_from_tapelist();
406 runs_per_cycle = conf_runspercycle;
408 if (runs_per_cycle <= 0) {
413 * do some basic sanity checking
415 if(conf_tapecycle <= runs_per_cycle) {
416 log_add(L_WARNING, _("tapecycle (%d) <= runspercycle (%d)"),
417 conf_tapecycle, runs_per_cycle);
420 tape = lookup_tapetype(conf_tapetype);
421 if(conf_maxdumpsize > (gint64)0) {
422 tape_length = conf_maxdumpsize;
423 g_fprintf(stderr, "planner: tape_length is set from maxdumpsize (%jd KB)\n",
424 (intmax_t)conf_maxdumpsize);
427 tape_length = tapetype_get_length(tape) * (gint64)conf_runtapes;
428 g_fprintf(stderr, "planner: tape_length is set from tape length (%jd KB) * runtapes (%d) == %jd KB\n",
429 (intmax_t)tapetype_get_length(tape),
431 (intmax_t)tape_length);
433 tape_mark = (size_t)tapetype_get_filemark(tape);
434 tt_blocksize_kb = (size_t)tapetype_get_blocksize(tape);
435 tt_blocksize = tt_blocksize_kb * 1024;
437 g_fprintf(stderr, _("%s: time %s: startup took %s secs\n"),
439 walltime_str(curclock()),
440 walltime_str(timessub(curclock(), section_start)));
443 * 3. Send autoflush dumps left on the holding disks
445 * This should give us something to do while we generate the new
449 g_fprintf(stderr,_("\nSENDING FLUSHES...\n"));
451 if(conf_autoflush && !no_taper) {
453 GSList *holding_list, *holding_file;
454 char *qdisk, *qhname;
456 /* get *all* flushable files in holding, without checking against
457 * the disklist (which may not contain some of the dumps) */
458 holding_list = holding_get_files_for_flush(NULL);
459 for(holding_file=holding_list; holding_file != NULL;
460 holding_file = holding_file->next) {
461 holding_file_get_dumpfile((char *)holding_file->data, &file);
463 if (holding_file_size((char *)holding_file->data, 1) <= 0) {
464 log_add(L_INFO, "%s: removing file with no data.",
465 (char *)holding_file->data);
466 holding_file_unlink((char *)holding_file->data);
467 dumpfile_free_data(&file);
471 /* see if this matches the command-line arguments */
472 if (conf_autoflush == 1 &&
473 !match_dumpfile(&file, exact_match, argc-diskarg_offset,
474 argv+diskarg_offset)) {
478 qdisk = quote_string(file.disk);
479 qhname = quote_string((char *)holding_file->data);
480 log_add(L_DISK, "%s %s", file.name, qdisk);
482 "FLUSH %s %s %s %d %s\n",
489 "FLUSH %s %s %s %d %s\n",
497 dumpfile_free_data(&file);
499 slist_free_full(holding_list, g_free);
502 g_fprintf(stderr, _("ENDFLUSH\n"));
503 g_fprintf(stdout, _("ENDFLUSH\n"));
507 * 4. Calculate Preliminary Dump Levels
509 * Before we can get estimates from the remote slave hosts, we make a
510 * first attempt at guessing what dump levels we will be dumping at
511 * based on the curinfo database.
514 g_fprintf(stderr,_("\nSETTING UP FOR ESTIMATES...\n"));
515 section_start = curclock();
517 startq.head = startq.tail = NULL;
518 while(!empty(origq)) {
519 disk_t *dp = dequeue_disk(&origq);
525 g_fprintf(stderr, _("%s: time %s: setting up estimates took %s secs\n"),
527 walltime_str(curclock()),
528 walltime_str(timessub(curclock(), section_start)));
532 * 5. Get Dump Size Estimates from Remote Client Hosts
534 * Each host is queried (in parallel) for dump size information on all
535 * of its disks, and the results gathered as they come in.
538 /* go out and get the dump estimates */
540 g_fprintf(stderr,_("\nGETTING ESTIMATES...\n"));
541 section_start = curclock();
543 estq.head = estq.tail = NULL;
544 pestq.head = pestq.tail = NULL;
545 waitq.head = waitq.tail = NULL;
546 failq.head = failq.tail = NULL;
550 g_fprintf(stderr, _("%s: time %s: getting estimates took %s secs\n"),
552 walltime_str(curclock()),
553 walltime_str(timessub(curclock(), section_start)));
556 * At this point, all disks with estimates are in estq, and
557 * all the disks on hosts that didn't respond to our inquiry
561 dump_queue("FAILED", failq, 15, stderr);
562 dump_queue("DONE", estq, 15, stderr);
565 exit_status = EXIT_FAILURE;
569 * 6. Analyze Dump Estimates
571 * Each disk's estimates are looked at to determine what level it
572 * should dump at, and to calculate the expected size and time taking
573 * historical dump rates and compression ratios into account. The
574 * total expected size is accumulated as well.
577 g_fprintf(stderr,_("\nANALYZING ESTIMATES...\n"));
578 section_start = curclock();
580 /* an empty tape still has a label and an endmark */
581 total_size = ((gint64)tt_blocksize_kb + (gint64)tape_mark) * (gint64)2;
585 schedq.head = schedq.tail = NULL;
586 while(!empty(estq)) analyze_estimate(dequeue_disk(&estq));
587 while(!empty(failq)) handle_failed(dequeue_disk(&failq));
589 run_server_global_scripts(EXECUTE_ON_POST_ESTIMATE, get_config_name());
592 * At this point, all the disks are on schedq sorted by priority.
593 * The total estimated size of the backups is in total_size.
599 g_fprintf(stderr, _("INITIAL SCHEDULE (size %lld):\n"),
600 (long long)total_size);
601 for(dp = schedq.head; dp != NULL; dp = dp->next) {
602 qname = quote_string(dp->name);
603 g_fprintf(stderr, _(" %s %s pri %d lev %d nsize %lld csize %lld\n"),
604 dp->host->hostname, qname, est(dp)->dump_priority,
605 est(dp)->dump_est->level,
606 (long long)est(dp)->dump_est->nsize,
607 (long long)est(dp)->dump_est->csize);
614 * 7. Delay Dumps if Schedule Too Big
616 * If the generated schedule is too big to fit on the tape, we need to
617 * delay some full dumps to make room. Incrementals will be done
618 * instead (except for new or forced disks).
620 * In extreme cases, delaying all the full dumps is not even enough.
621 * If so, some low-priority incrementals will be skipped completely
622 * until the dumps fit on the tape.
625 g_fprintf(stderr, _("\nDELAYING DUMPS IF NEEDED, total_size %lld, tape length %lld mark %zu\n"),
626 (long long)total_size,
627 (long long)tape_length,
630 initial_size = total_size;
634 /* XXX - why bother checking this? */
635 if(empty(schedq) && total_size < initial_size) {
636 error(_("cannot fit anything on tape, bailing out"));
642 * 8. Promote Dumps if Schedule Too Small
644 * Amanda attempts to balance the full dumps over the length of the
645 * dump cycle. If this night's full dumps are too small relative to
646 * the other nights, promote some high-priority full dumps that will be
647 * due for the next run, to full dumps for tonight, taking care not to
648 * overflow the tape size.
650 * This doesn't work too well for small sites. For these we scan ahead
651 * looking for nights that have an excessive number of dumps and promote
654 * Amanda never delays full dumps just for the sake of balancing the
655 * schedule, so it can take a full cycle to balance the schedule after
660 _("\nPROMOTING DUMPS IF NEEDED, total_lev0 %1.0lf, balanced_size %1.0lf...\n"),
661 total_lev0, balanced_size);
663 balance_threshold = balanced_size * PROMOTE_THRESHOLD;
665 while((balanced_size - total_lev0) > balance_threshold && moved_one)
666 moved_one = promote_highest_priority_incremental();
668 moved_one = promote_hills();
670 g_fprintf(stderr, _("%s: time %s: analysis took %s secs\n"),
672 walltime_str(curclock()),
673 walltime_str(timessub(curclock(), section_start)));
679 * The schedule goes to stdout, presumably to driver. A copy is written
680 * on stderr for the debug file.
683 g_fprintf(stderr,_("\nGENERATING SCHEDULE:\n--------\n"));
685 exit_status = EXIT_FAILURE;
686 g_fprintf(stderr, _("--> Generated empty schedule! <--\n"));
688 while(!empty(schedq)) output_scheduleline(dequeue_disk(&schedq));
690 g_fprintf(stderr, _("--------\n"));
693 log_add(L_FINISH, _("date %s time %s"), planner_timestamp, walltime_str(curclock()));
694 log_add(L_INFO, "pid-done %ld", (long)getpid());
697 amfree(planner_timestamp);
698 amfree(our_feature_string);
699 am_release_feature_set(our_features);
710 * ========================================================================
711 * SETUP FOR ESTIMATES
715 static void askfor(est_t *, int, int, info_t *);
716 static int last_level(info_t *info); /* subroutines */
717 static one_est_t *est_for_level(disk_t *dp, int level);
718 static void est_csize(disk_t *dp, one_est_t *one_est);
719 static gint64 est_tape_size(disk_t *dp, int level);
720 static int next_level0(disk_t *dp, info_t *info);
721 static int runs_at(info_t *info, int lev);
722 static gint64 bump_thresh(int level, gint64 size_level_0, int bumppercent, gint64 bumpsize, double bumpmult);
723 static int when_overwrite(char *label);
726 est_t *ep, /* esimate data block */
727 int seq, /* sequence number of request */
728 int lev, /* dump level being requested */
729 info_t *info) /* info block for disk */
731 if(seq < 0 || seq >= MAX_LEVELS) {
732 error(_("error [planner askfor: seq out of range 0..%d: %d]"),
736 if(lev < -1 || lev >= DUMP_LEVELS) {
737 error(_("error [planner askfor: lev out of range -1..%d: %d]"),
743 ep->estimate[seq].level = -1;
744 ep->estimate[seq].dumpdate = (char *)0;
745 ep->estimate[seq].nsize = (gint64)-3;
746 ep->estimate[seq].csize = (gint64)-3;
747 ep->estimate[seq].guessed = 0;
751 ep->estimate[seq].level = lev;
753 ep->estimate[seq].dumpdate = stralloc(get_dumpdate(info,lev));
755 ep->estimate[seq].nsize = (gint64)-3;
756 ep->estimate[seq].csize = (gint64)-3;
757 ep->estimate[seq].guessed = 0;
772 assert(dp && dp->host);
774 qname = quote_string(dp->name);
775 g_fprintf(stderr, _("%s: time %s: setting up estimates for %s:%s\n"),
776 get_pname(), walltime_str(curclock()),
777 dp->host->hostname, qname);
779 /* get current information about disk */
781 info = g_new0(info_t, 1);
782 if(get_info(dp->host->hostname, dp->name, info)) {
783 /* no record for this disk, make a note of it */
784 log_add(L_INFO, _("Adding new disk %s:%s."), dp->host->hostname, qname);
787 if (dp->data_path == DATA_PATH_DIRECTTCP) {
788 if (dp->compress != COMP_NONE) {
789 log_add(L_FAIL, _("%s %s %s 0 [Can't compress directtcp data-path]"),
790 dp->host->hostname, qname, planner_timestamp);
791 g_fprintf(stderr,_("%s:%s lev 0 skipped can't compress directtcp data-path\n"),
792 dp->host->hostname, qname);
796 if (dp->encrypt != ENCRYPT_NONE) {
797 log_add(L_FAIL, _("%s %s %s 0 [Can't encrypt directtcp data-path]"),
798 dp->host->hostname, qname, planner_timestamp);
799 g_fprintf(stderr,_("%s:%s lev 0 skipped can't encrypt directtcp data-path\n"),
800 dp->host->hostname, qname);
804 if (dp->to_holdingdisk == HOLD_REQUIRED) {
805 log_add(L_FAIL, _("%s %s %s 0 [Holding disk can't be use for directtcp data-path]"),
806 dp->host->hostname, qname, planner_timestamp);
807 g_fprintf(stderr,_("%s:%s lev 0 skipped Holding disk can't be use for directtcp data-path\n"),
808 dp->host->hostname, qname);
811 } else if (dp->to_holdingdisk == HOLD_AUTO) {
812 g_fprintf(stderr,_("%s:%s Disabling holding disk\n"),
813 dp->host->hostname, qname);
814 dp->to_holdingdisk = HOLD_NEVER;
818 /* setup working data struct for disk */
820 ep = alloc(SIZEOF(est_t));
821 dp->up = (void *) ep;
823 ep->state = DISK_READY;
824 ep->dump_priority = dp->priority;
828 ep->degr_mesg = NULL;
829 ep->dump_est = &default_one_est;
830 ep->degr_est = &default_one_est;
832 /* calculated fields */
834 if (ISSET(info->command, FORCE_FULL)) {
835 /* force a level 0, kind of like a new disk */
836 if(dp->strategy == DS_NOFULL) {
838 * XXX - Not sure what it means to force a no-full disk. The
839 * purpose of no-full is to just dump changes relative to a
840 * stable base, for example root partitions that vary only
841 * slightly from a site-wide prototype. Only the variations
844 * If we allow a level 0 onto the Amanda cycle, then we are
845 * hosed when that tape gets re-used next. Disallow this for
849 _("Cannot force full dump of %s:%s with no-full option."),
850 dp->host->hostname, qname);
852 /* clear force command */
853 CLR(info->command, FORCE_FULL);
854 ep->last_level = last_level(info);
855 ep->next_level0 = next_level0(dp, info);
856 } else if (dp->strategy == DS_INCRONLY) {
858 _("Cannot force full dump of %s:%s with incronly option."),
859 dp->host->hostname, qname);
861 /* clear force command */
862 CLR(info->command, FORCE_FULL);
863 ep->last_level = last_level(info);
864 ep->next_level0 = next_level0(dp, info);
866 ep->degr_mesg = _("Skipping: force-full disk can't be dumped in degraded mode");
868 ep->next_level0 = -conf_dumpcycle;
869 log_add(L_INFO, _("Forcing full dump of %s:%s as directed."),
870 dp->host->hostname, qname);
873 else if(dp->strategy == DS_NOFULL) {
874 /* force estimate of level 1 */
876 ep->next_level0 = next_level0(dp, info);
879 ep->last_level = last_level(info);
880 ep->next_level0 = next_level0(dp, info);
883 /* adjust priority levels */
885 /* warn if dump will be overwritten */
886 if (ep->last_level > -1 && strlen(info->inf[0].label) > 0) {
887 overwrite_runs = when_overwrite(info->inf[0].label);
888 if(overwrite_runs == 0) {
889 log_add(L_WARNING, _("Last full dump of %s:%s "
890 "on tape %s overwritten on this run."),
891 dp->host->hostname, qname, info->inf[0].label);
892 } else if(overwrite_runs <= RUNS_REDZONE) {
894 plural(_("Last full dump of %s:%s on tape %s overwritten in %d run."),
895 _("Last full dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
896 dp->host->hostname, qname, info->inf[0].label,
901 /* warn if last level 1 will be overwritten */
902 if (ep->last_level > 1 && strlen(info->inf[1].label) > 0) {
903 overwrite_runs = when_overwrite(info->inf[1].label);
904 if(overwrite_runs == 0) {
905 log_add(L_WARNING, _("Last level 1 dump of %s:%s "
906 "on tape %s overwritten on this run, resetting to level 1"),
907 dp->host->hostname, qname, info->inf[1].label);
909 } else if(overwrite_runs <= RUNS_REDZONE) {
911 plural(_("Last level 1 dump of %s:%s on tape %s overwritten in %d run."),
912 _("Last level 1 dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
913 dp->host->hostname, qname, info->inf[1].label,
918 if(ep->next_level0 < 0) {
919 g_fprintf(stderr,plural(_("%s:%s overdue %d day for level 0\n"),
920 _("%s:%s overdue %d days for level 0\n"),
922 dp->host->hostname, qname, (-ep->next_level0));
923 ep->dump_priority -= ep->next_level0;
925 else if (ISSET(info->command, FORCE_FULL))
926 ep->dump_priority += 1;
927 /* else XXX bump up the priority of incrementals that failed last night */
929 /* handle external level 0 dumps */
931 if(dp->skip_full && dp->strategy != DS_NOINC) {
932 if(ep->next_level0 <= 0) {
933 /* update the date field */
934 info->inf[0].date = today;
935 CLR(info->command, FORCE_FULL);
936 ep->next_level0 += conf_dumpcycle;
938 if(put_info(dp->host->hostname, dp->name, info)) {
939 error(_("could not put info record for %s:%s: %s"),
940 dp->host->hostname, qname, strerror(errno));
943 log_add(L_INFO, _("Skipping full dump of %s:%s today."),
944 dp->host->hostname, qname);
945 g_fprintf(stderr,_("%s:%s lev 0 skipped due to skip-full flag\n"),
946 dp->host->hostname, qname);
947 /* don't enqueue the disk */
948 askfor(ep, 0, -1, info);
949 askfor(ep, 1, -1, info);
950 askfor(ep, 2, -1, info);
951 g_fprintf(stderr, _("%s: SKIPPED %s %s 0 [skip-full]\n"),
952 get_pname(), dp->host->hostname, qname);
953 log_add(L_SUCCESS, _("%s %s %s 0 [skipped: skip-full]"),
954 dp->host->hostname, qname, planner_timestamp);
959 if(ep->last_level == -1) {
960 /* probably a new disk, but skip-full means no full! */
964 if(ep->next_level0 == 1) {
965 log_add(L_WARNING, _("Skipping full dump of %s:%s tomorrow."),
966 dp->host->hostname, qname);
970 if(dp->strategy == DS_INCRONLY && ep->last_level == -1 && !ISSET(info->command, FORCE_FULL)) {
971 /* don't enqueue the disk */
972 askfor(ep, 0, -1, info);
973 askfor(ep, 1, -1, info);
974 askfor(ep, 2, -1, info);
975 log_add(L_FAIL, _("%s %s 19000101 1 [Skipping incronly because no full dump were done]"),
976 dp->host->hostname, qname);
977 g_fprintf(stderr,_("%s:%s lev 1 skipped due to strategy incronly and no full dump were done\n"),
978 dp->host->hostname, qname);
983 /* handle "skip-incr" type archives */
985 if(dp->skip_incr && ep->next_level0 > 0) {
986 g_fprintf(stderr,_("%s:%s lev 1 skipped due to skip-incr flag\n"),
987 dp->host->hostname, qname);
988 /* don't enqueue the disk */
989 askfor(ep, 0, -1, info);
990 askfor(ep, 1, -1, info);
991 askfor(ep, 2, -1, info);
993 g_fprintf(stderr, _("%s: SKIPPED %s %s 1 [skip-incr]\n"),
994 get_pname(), dp->host->hostname, qname);
996 log_add(L_SUCCESS, _("%s %s %s 1 [skipped: skip-incr]"),
997 dp->host->hostname, qname, planner_timestamp);
1002 if( ep->last_level == -1 && ep->next_level0 > 0 &&
1003 dp->strategy != DS_NOFULL && dp->strategy != DS_INCRONLY &&
1004 conf_reserve == 100) {
1005 log_add(L_WARNING, _("%s:%s mismatch: no tapelist record, "
1006 "but curinfo next_level0: %d."),
1007 dp->host->hostname, qname, ep->next_level0);
1008 ep->next_level0 = 0;
1011 //if(ep->last_level == 0) ep->level_days = 0;
1012 //else ep->level_days = runs_at(info, ep->last_level);
1013 ep->level_days = runs_at(info, ep->last_level);
1014 ep->last_lev0size = info->inf[0].csize;
1016 ep->fullrate = perf_average(info->full.rate, 0.0);
1017 ep->incrrate = perf_average(info->incr.rate, 0.0);
1019 ep->fullcomp = perf_average(info->full.comp, dp->comprate[0]);
1020 ep->incrcomp = perf_average(info->incr.comp, dp->comprate[1]);
1022 /* determine which estimates to get */
1026 if (dp->strategy == DS_NOINC ||
1028 (!ISSET(info->command, FORCE_BUMP) ||
1030 ep->last_level == -1))) {
1031 if(ISSET(info->command, FORCE_BUMP) && ep->last_level == -1) {
1033 _("Remove force-bump command of %s:%s because it's a new disk."),
1034 dp->host->hostname, qname);
1036 switch (dp->strategy) {
1039 askfor(ep, i++, 0, info);
1040 if (ep->last_level == -1)
1041 ep->degr_mesg = _("Skipping: new disk can't be dumped in degraded mode");
1043 ep->degr_mesg = _("Skipping: strategy NOINC can't be dumped in degraded mode");
1045 log_add(L_INFO, _("Ignoring skip-full for %s:%s "
1046 "because the strategy is NOINC."),
1047 dp->host->hostname, qname);
1049 if(ISSET(info->command, FORCE_BUMP)) {
1051 _("Ignoring FORCE_BUMP for %s:%s because the strategy is NOINC."),
1052 dp->host->hostname, qname);
1061 if (ISSET(info->command, FORCE_FULL))
1067 if(!dp->skip_incr && !(dp->strategy == DS_NOINC)) {
1068 if(ep->last_level == -1) { /* a new disk */
1069 if (ep->degr_mesg == NULL)
1070 ep->degr_mesg = _("Skipping: new disk can't be dumped in degraded mode");
1071 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY) {
1072 askfor(ep, i++, 1, info);
1074 assert(!dp->skip_full); /* should be handled above */
1076 } else { /* not new, pick normally */
1079 curr_level = ep->last_level;
1081 if (ISSET(info->command, FORCE_NO_BUMP)) {
1082 if(curr_level > 0) { /* level 0 already asked for */
1083 askfor(ep, i++, curr_level, info);
1085 log_add(L_INFO,_("Preventing bump of %s:%s as directed."),
1086 dp->host->hostname, qname);
1087 ep->degr_mesg = _("Skipping: force-no-bump disk can't be dumped in degraded mode");
1088 } else if (ISSET(info->command, FORCE_BUMP)
1089 && curr_level + 1 < DUMP_LEVELS) {
1090 askfor(ep, i++, curr_level+1, info);
1091 log_add(L_INFO,_("Bumping of %s:%s at level %d as directed."),
1092 dp->host->hostname, qname, curr_level+1);
1093 ep->degr_mesg = _("Skipping: force-bump disk can't be dumped in degraded mode");
1094 } else if (curr_level == 0) {
1095 askfor(ep, i++, 1, info);
1097 askfor(ep, i++, curr_level, info);
1099 * If last time we dumped less than the threshold, then this
1100 * time we will too, OR the extra size will be charged to both
1101 * cur_level and cur_level + 1, so we will never bump. Also,
1102 * if we haven't been at this level 2 days, or the dump failed
1103 * last night, we can't bump.
1105 if((info->inf[curr_level].size == (gint64)0 || /* no data, try it anyway */
1106 (((info->inf[curr_level].size > bump_thresh(curr_level, info->inf[0].size,dp->bumppercent, dp->bumpsize, dp->bumpmult)))
1107 && ep->level_days >= dp->bumpdays))
1108 && curr_level + 1 < DUMP_LEVELS) {
1109 askfor(ep, i++, curr_level+1, info);
1115 while(i < MAX_LEVELS) /* mark end of estimates */
1116 askfor(ep, i++, -1, info);
1120 g_fprintf(stderr, _("setup_estimate: %s:%s: command %u, options: %s "
1121 "last_level %d next_level0 %d level_days %d getting estimates "
1122 "%d (%lld) %d (%lld) %d (%lld)\n"),
1123 dp->host->hostname, qname, info->command,
1124 dp->strategy == DS_NOFULL ? "no-full" :
1125 dp->strategy == DS_INCRONLY ? "incr-only" :
1126 dp->skip_full ? "skip-full" :
1127 dp->skip_incr ? "skip-incr" : "none",
1128 ep->last_level, ep->next_level0, ep->level_days,
1129 ep->estimate[0].level, (long long)ep->estimate[0].nsize,
1130 ep->estimate[1].level, (long long)ep->estimate[1].nsize,
1131 ep->estimate[2].level, (long long)ep->estimate[2].nsize);
1133 assert(ep->estimate[0].level != -1);
1134 enqueue_disk(&startq, dp);
1138 static int when_overwrite(
1144 runtapes = conf_runtapes;
1145 if(runtapes == 0) runtapes = 1;
1147 if((tp = lookup_tapelabel(label)) == NULL)
1148 return 1; /* "shouldn't happen", but trigger warning message */
1149 else if(tp->reuse == 0)
1151 else if(lookup_nb_tape() > conf_tapecycle)
1152 return (lookup_nb_tape() - tp->position) / runtapes;
1154 return (conf_tapecycle - tp->position) / runtapes;
1157 /* Return the estimated size for a particular dump */
1165 if (level < 0 || level >= DUMP_LEVELS)
1166 return &default_one_est;
1168 for (i = 0; i < MAX_LEVELS; i++) {
1169 if (level == est(dp)->estimate[i].level) {
1170 if (est(dp)->estimate[i].csize <= -1) {
1171 est_csize(dp, &est(dp)->estimate[i]);
1173 return &est(dp)->estimate[i];
1176 return &default_one_est;
1179 /* Return the estimated on-tape size of a particular dump */
1185 gint64 size = one_est->nsize;
1188 if (dp->compress == COMP_NONE) {
1189 one_est->csize = one_est->nsize;
1193 if (one_est->level == 0) ratio = est(dp)->fullcomp;
1194 else ratio = est(dp)->incrcomp;
1197 * make sure over-inflated compression ratios don't throw off the
1198 * estimates, this is mostly for when you have a small dump getting
1199 * compressed which takes up alot more disk/tape space relatively due
1200 * to the overhead of the compression. This is specifically for
1201 * Digital Unix vdump. This patch is courtesy of Rudolf Gabler
1202 * (RUG@USM.Uni-Muenchen.DE)
1205 if (ratio > 1.1) ratio = 1.1;
1207 size = (gint64)((double)size * ratio);
1210 * Ratio can be very small in some error situations, so make sure
1211 * size goes back greater than zero. It may not be right, but
1212 * indicates we did get an estimate.
1214 if (size <= (gint64)0) {
1218 one_est->csize = size;
1221 static gint64 est_tape_size(
1225 one_est_t *dump_est;
1227 dump_est = est_for_level(dp, level);
1228 if (dump_est->level >= 0 && dump_est->csize <= -1)
1229 est_csize(dp, dump_est);
1230 return dump_est->csize;
1234 /* what was the level of the last successful dump to tape? */
1235 static int last_level(
1238 int min_pos, min_level, i;
1239 time_t lev0_date, last_date;
1242 if(info->last_level != -1)
1243 return info->last_level;
1245 /* to keep compatibility with old infofile */
1246 min_pos = 1000000000;
1250 for(i = 0; i < 9; i++) {
1251 if(conf_reserve < 100) {
1252 if(i == 0) lev0_date = info->inf[0].date;
1253 else if(info->inf[i].date < lev0_date) continue;
1254 if(info->inf[i].date > last_date) {
1255 last_date = info->inf[i].date;
1260 if((tp = lookup_tapelabel(info->inf[i].label)) == NULL) continue;
1261 /* cull any entries from previous cycles */
1262 if(i == 0) lev0_date = info->inf[0].date;
1263 else if(info->inf[i].date < lev0_date) continue;
1265 if(tp->position < min_pos) {
1266 min_pos = tp->position;
1271 info->last_level = i;
1275 /* when is next level 0 due? 0 = today, 1 = tomorrow, etc*/
1281 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY)
1282 return 1; /* fake it */
1283 else if (dp->strategy == DS_NOINC)
1285 else if(info->inf[0].date < (time_t)0)
1286 return -days_diff(EPOCH, today); /* new disk */
1288 return dp->dumpcycle - days_diff(info->inf[0].date, today);
1291 /* how many runs at current level? */
1296 tape_t *cur_tape, *old_tape;
1299 last = last_level(info);
1300 if(lev != last) return 0;
1301 if(info->consecutive_runs != -1)
1302 return info->consecutive_runs;
1303 if(lev == 0) return 1;
1305 /* to keep compatibility with old infofile */
1306 cur_tape = lookup_tapelabel(info->inf[lev].label);
1307 old_tape = lookup_tapelabel(info->inf[lev-1].label);
1308 if(cur_tape == NULL || old_tape == NULL) return 0;
1310 if(conf_runtapes == 0)
1311 nb_runs = (old_tape->position - cur_tape->position) / 1;
1313 nb_runs = (old_tape->position - cur_tape->position) / conf_runtapes;
1314 info->consecutive_runs = nb_runs;
1320 static gint64 bump_thresh(
1322 gint64 size_level_0,
1329 if ((bumppercent != 0) && (size_level_0 > (gint64)1024)) {
1330 bump = ((double)size_level_0 * (double)bumppercent) / 100.0;
1333 bump = (double)bumpsize;
1335 while(--level) bump = bump * bumpmult;
1337 return (gint64)bump;
1343 * ========================================================================
1344 * GET REMOTE DUMP SIZE ESTIMATES
1348 static void getsize(am_host_t *hostp);
1349 static disk_t *lookup_hostdisk(am_host_t *hp, char *str);
1350 static void handle_result(void *datap, pkt_t *pkt, security_handle_t *sech);
1353 static void get_estimates(void)
1357 int something_started;
1359 something_started = 1;
1360 while(something_started) {
1361 something_started = 0;
1362 for(dp = startq.head; dp != NULL; dp = dp->next) {
1364 if(hostp->up == HOST_READY) {
1365 something_started = 1;
1366 run_server_host_scripts(EXECUTE_ON_PRE_HOST_ESTIMATE,
1367 get_config_name(), hostp);
1368 for(dp1 = hostp->disks; dp1 != NULL; dp1 = dp1->hostnext) {
1370 run_server_dle_scripts(EXECUTE_ON_PRE_DLE_ESTIMATE,
1371 get_config_name(), dp1,
1372 est(dp1)->estimate[0].level);
1377 * dp is no longer on startq, so dp->next is not valid
1378 * and we have to start all over.
1386 while(!empty(waitq)) {
1387 disk_t *dp = dequeue_disk(&waitq);
1388 est(dp)->errstr = _("hmm, disk was stranded on waitq");
1389 enqueue_disk(&failq, dp);
1392 while(!empty(pestq)) {
1393 disk_t *dp = dequeue_disk(&pestq);
1394 char * qname = quote_string(dp->name);
1397 for (i=0; i < MAX_LEVELS; i++) {
1398 if (est(dp)->estimate[i].level != -1 &&
1399 est(dp)->estimate[i].nsize < (gint64)0) {
1400 if (est(dp)->estimate[i].nsize == (gint64)-3) {
1402 _("disk %s:%s, estimate of level %d timed out."),
1403 dp->host->hostname, qname, est(dp)->estimate[i].level);
1405 est(dp)->estimate[i].level = -1;
1409 if ((est(dp)->estimate[0].level != -1 &&
1410 est(dp)->estimate[0].nsize > (gint64)0) ||
1411 (est(dp)->estimate[1].level != -1 &&
1412 est(dp)->estimate[1].nsize > (gint64)0) ||
1413 (est(dp)->estimate[2].level != -1 &&
1414 est(dp)->estimate[2].nsize > (gint64)0)) {
1415 enqueue_disk(&estq, dp);
1418 est(dp)->errstr = vstralloc("disk ", qname,
1419 _(", all estimate timed out"), NULL);
1420 enqueue_disk(&failq, dp);
1426 static void getsize(
1429 char number[NUM_STR_SIZE], *req;
1432 time_t estimates, timeout;
1434 const security_driver_t *secdrv;
1436 char * qname, *b64disk = NULL;
1437 char * qdevice, *b64device = NULL;
1438 estimate_t estimate;
1441 assert(hostp->disks != NULL);
1443 if(hostp->up != HOST_READY) {
1448 * The first time through here we send a "noop" request. This will
1449 * return the feature list from the client if it supports that.
1450 * If it does not, handle_result() will set the feature list to an
1451 * empty structure. In either case, we do the disks on the second
1452 * (and subsequent) pass(es).
1454 if(hostp->features != NULL) { /* sendsize service */
1458 int has_features = am_has_feature(hostp->features,
1459 fe_req_options_features);
1460 int has_hostname = am_has_feature(hostp->features,
1461 fe_req_options_hostname);
1462 int has_maxdumps = am_has_feature(hostp->features,
1463 fe_req_options_maxdumps);
1464 int has_config = am_has_feature(hostp->features,
1465 fe_req_options_config);
1467 g_snprintf(number, SIZEOF(number), "%d", hostp->maxdumps);
1468 req = vstralloc("SERVICE ", "sendsize", "\n",
1470 has_features ? "features=" : "",
1471 has_features ? our_feature_string : "",
1472 has_features ? ";" : "",
1473 has_maxdumps ? "maxdumps=" : "",
1474 has_maxdumps ? number : "",
1475 has_maxdumps ? ";" : "",
1476 has_hostname ? "hostname=" : "",
1477 has_hostname ? hostp->hostname : "",
1478 has_hostname ? ";" : "",
1479 has_config ? "config=" : "",
1480 has_config ? get_config_name() : "",
1481 has_config ? ";" : "",
1484 req_len = strlen(req);
1485 req_len += 128; /* room for SECURITY ... */
1487 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1491 GPtrArray *errarray;
1493 if(dp->todo == 0) continue;
1495 if(est(dp)->state != DISK_READY) continue;
1497 est(dp)->got_estimate = 0;
1498 if (est(dp)->estimate[0].level == -1) {
1499 est(dp)->state = DISK_DONE;
1503 qname = quote_string(dp->name);
1505 errarray = validate_optionstr(dp);
1506 if (errarray->len > 0) {
1508 for (i=0; i < errarray->len; i++) {
1509 log_add(L_FAIL, _("%s %s %s 0 [%s]"),
1510 dp->host->hostname, qname,
1512 (char *)g_ptr_array_index(errarray, i));
1515 est(dp)->state = DISK_DONE;
1519 b64disk = amxml_format_tag("disk", dp->name);
1520 qdevice = quote_string(dp->device);
1521 estimate = (estimate_t)GPOINTER_TO_INT(dp->estimatelist->data);
1523 b64device = amxml_format_tag("diskdevice", dp->device);
1525 estimate = ES_CLIENT;
1526 for (el = dp->estimatelist; el != NULL; el = el->next) {
1527 estimate = (estimate_t)GPOINTER_TO_INT(el->data);
1528 if (estimate == ES_SERVER)
1531 if (estimate == ES_SERVER) {
1534 get_info(dp->host->hostname, dp->name, &info);
1535 for(i = 0; i < MAX_LEVELS; i++) {
1536 int lev = est(dp)->estimate[i].level;
1538 if(lev == -1) break;
1539 server_estimate(dp, i, &info, lev);
1541 g_fprintf(stderr,_("%s time %s: got result for host %s disk %s:"),
1542 get_pname(), walltime_str(curclock()),
1543 dp->host->hostname, qname);
1544 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1545 est(dp)->estimate[0].level,
1546 (long long)est(dp)->estimate[0].nsize,
1547 est(dp)->estimate[1].level,
1548 (long long)est(dp)->estimate[1].nsize,
1549 est(dp)->estimate[2].level,
1550 (long long)est(dp)->estimate[2].nsize);
1551 if (!am_has_feature(hostp->features, fe_xml_estimate)) {
1552 est(dp)->state = DISK_DONE;
1553 remove_disk(&startq, dp);
1554 enqueue_disk(&estq, dp);
1558 estimate = ES_SERVER;
1559 for (el = dp->estimatelist; el != NULL; el = el->next) {
1560 estimate = (estimate_t)GPOINTER_TO_INT(el->data);
1561 if (estimate == ES_CLIENT || estimate == ES_CALCSIZE)
1564 if (estimate == ES_CLIENT ||
1565 estimate == ES_CALCSIZE ||
1566 (am_has_feature(hostp->features, fe_req_xml) &&
1567 am_has_feature(hostp->features, fe_xml_estimate))) {
1571 if (am_has_feature(hostp->features, fe_req_xml)) {
1572 char *levelstr = NULL;
1573 char *spindlestr = NULL;
1574 char level[NUM_STR_SIZE];
1575 char spindle[NUM_STR_SIZE];
1580 get_info(dp->host->hostname, dp->name, &info);
1581 for(i = 0; i < MAX_LEVELS; i++) {
1583 int lev = est(dp)->estimate[i].level;
1584 if (lev == -1) break;
1585 g_snprintf(level, SIZEOF(level), "%d", lev);
1586 if (am_has_feature(hostp->features, fe_xml_level_server) &&
1587 server_can_do_estimate(dp, &info, lev)) {
1588 server = "<server>YES</server>";
1592 vstrextend(&levelstr, " <level>",
1594 "</level>\n", NULL);
1596 g_snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1597 spindlestr = vstralloc(" <spindle>",
1599 "</spindle>\n", NULL);
1600 o = xml_optionstr(dp, 0);
1602 if (strcmp(dp->program,"DUMP") == 0 ||
1603 strcmp(dp->program,"GNUTAR") == 0) {
1604 l = vstralloc("<dle>\n",
1607 "</program>\n", NULL);
1609 l = vstralloc("<dle>\n",
1610 " <program>APPLICATION</program>\n",
1612 if (dp->application) {
1613 application_t *application;
1616 application = lookup_application(dp->application);
1617 g_assert(application != NULL);
1618 xml_app = xml_application(dp, application,
1620 vstrextend(&l, xml_app, NULL);
1625 es = xml_estimate(dp->estimatelist, hostp->features);
1626 vstrextend(&l, es, "\n", NULL);
1628 vstrextend(&l, " ", b64disk, "\n", NULL);
1630 vstrextend(&l, " ", b64device, "\n", NULL);
1631 vstrextend(&l, levelstr, spindlestr, o, "</dle>\n", NULL);
1638 } else if (strcmp(dp->program,"DUMP") != 0 &&
1639 strcmp(dp->program,"GNUTAR") != 0) {
1640 est(dp)->errstr = newvstrallocf(est(dp)->errstr,
1641 _("does not support application-api"));
1643 for(i = 0; i < MAX_LEVELS; i++) {
1645 char *exclude1 = "";
1646 char *exclude2 = "";
1647 char *excludefree = NULL;
1648 char *include1 = "";
1649 char *include2 = "";
1650 char *includefree = NULL;
1651 char spindle[NUM_STR_SIZE];
1652 char level[NUM_STR_SIZE];
1653 int lev = est(dp)->estimate[i].level;
1655 if(lev == -1) break;
1657 g_snprintf(level, SIZEOF(level), "%d", lev);
1658 g_snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1659 if (am_has_feature(hostp->features,
1660 fe_sendsize_req_options)){
1661 exclude1 = " OPTIONS |";
1662 exclude2 = optionstr(dp);
1663 if ( exclude2 == NULL ) {
1664 error(_("problem with option string, check the dumptype definition.\n"));
1666 excludefree = exclude2;
1669 if (dp->exclude_file &&
1670 dp->exclude_file->nb_element == 1) {
1671 exclude1 = " exclude-file=";
1672 exclude2 = quote_string(
1673 dp->exclude_file->first->name);
1674 excludefree = exclude2;
1676 else if (dp->exclude_list &&
1677 dp->exclude_list->nb_element == 1) {
1678 exclude1 = " exclude-list=";
1679 exclude2 = quote_string(
1680 dp->exclude_list->first->name);
1681 excludefree = exclude2;
1683 if (dp->include_file &&
1684 dp->include_file->nb_element == 1) {
1685 include1 = " include-file=";
1686 include2 = quote_string(
1687 dp->include_file->first->name);
1688 includefree = include2;
1690 else if (dp->include_list &&
1691 dp->include_list->nb_element == 1) {
1692 include1 = " include-list=";
1693 include2 = quote_string(
1694 dp->include_list->first->name);
1695 includefree = include2;
1699 if (estimate == ES_CALCSIZE &&
1700 !am_has_feature(hostp->features,
1701 fe_calcsize_estimate)) {
1703 _("%s:%s does not support CALCSIZE for estimate, using CLIENT.\n"),
1704 hostp->hostname, qname);
1705 estimate = ES_CLIENT;
1707 if(estimate == ES_CLIENT)
1710 calcsize = "CALCSIZE ";
1712 l = vstralloc(calcsize,
1715 " ", dp->device ? qdevice : "",
1717 " ", est(dp)->estimate[i].dumpdate,
1719 " ", exclude1, exclude2,
1720 ((includefree != NULL) ? " " : ""),
1727 amfree(includefree);
1728 amfree(excludefree);
1736 if (est(dp)->state == DISK_DONE) {
1737 remove_disk(&estq, dp);
1738 est(dp)->state = DISK_PARTIALY_DONE;
1739 enqueue_disk(&pestq, dp);
1741 remove_disk(&startq, dp);
1742 est(dp)->state = DISK_ACTIVE;
1744 } else if (est(dp)->state != DISK_DONE) {
1745 remove_disk(&startq, dp);
1746 est(dp)->state = DISK_DONE;
1747 if (est(dp)->errstr == NULL) {
1748 est(dp)->errstr = vstrallocf(
1749 _("Can't request estimate"));
1751 enqueue_disk(&failq, dp);
1760 if(estimates == 0) {
1762 hostp->up = HOST_DONE;
1766 if (conf_etimeout < 0) {
1767 timeout = - conf_etimeout;
1769 timeout = estimates * conf_etimeout;
1771 } else { /* noop service */
1772 req = vstralloc("SERVICE ", "noop", "\n",
1774 "features=", our_feature_string, ";",
1778 * We use ctimeout for the "noop" request because it should be
1779 * very fast and etimeout has other side effects.
1781 timeout = (time_t)getconf_int(CNF_CTIMEOUT);
1784 dbprintf(_("send request:\n----\n%s\n----\n\n"), req);
1785 secdrv = security_getdriver(hostp->disks->auth);
1786 if (secdrv == NULL) {
1787 hostp->up = HOST_DONE;
1789 _("Could not find security driver '%s' for host '%s'"),
1790 hostp->disks->auth, hostp->hostname);
1794 hostp->up = HOST_ACTIVE;
1796 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1800 if(est(dp)->state == DISK_ACTIVE) {
1801 est(dp)->errstr = NULL;
1802 enqueue_disk(&waitq, dp);
1806 protocol_sendreq(hostp->hostname, secdrv, amhost_get_security_conf,
1807 req, timeout, handle_result, hostp);
1812 static disk_t *lookup_hostdisk(
1813 /*@keep@*/ am_host_t *hp,
1818 for(dp = hp->disks; dp != NULL; dp = dp->hostnext)
1819 if(strcmp(str, dp->name) == 0) return dp;
1825 static void handle_result(
1828 security_handle_t *sech)
1834 char *msg, msg_undo;
1835 char *remoterr, *errbuf = NULL;
1846 hostp = (am_host_t *)datap;
1847 hostp->up = HOST_READY;
1850 if (strcmp(security_geterror(sech), "timeout waiting for REP") == 0) {
1851 errbuf = vstrallocf("Some estimate timeout on %s, using server estimate if possible", hostp->hostname);
1853 errbuf = vstrallocf(_("Request to %s failed: %s"),
1854 hostp->hostname, security_geterror(sech));
1858 if (pkt->type == P_NAK) {
1860 if(strncmp_const_skip(s, "ERROR ", s, ch) == 0) {
1863 goto NAK_parse_failed;
1865 skip_whitespace(s, ch);
1866 if(ch == '\0') goto NAK_parse_failed;
1868 if((s = strchr(remoterr, '\n')) != NULL) {
1869 if(s == remoterr) goto NAK_parse_failed;
1872 if (strcmp(remoterr, "unknown service: noop") != 0
1873 && strcmp(remoterr, "noop: invalid service") != 0) {
1874 errbuf = vstralloc(hostp->hostname, " NAK: ", remoterr, NULL);
1880 dbprintf(_("got reply:\n----\n%s\n----\n\n"), pkt->body);
1886 if(strncmp_const(line, "OPTIONS ") == 0) {
1887 t = strstr(line, "features=");
1888 if(t != NULL && (g_ascii_isspace((int)t[-1]) || t[-1] == ';')) {
1889 char *u = strchr(t, ';');
1892 t += SIZEOF("features=")-1;
1893 am_release_feature_set(hostp->features);
1894 if((hostp->features = am_string_to_feature(t)) == NULL) {
1895 errbuf = vstrallocf(hostp->hostname,
1896 _(": bad features value: %s\n"), line);
1902 skip_quoted_line(s, ch);
1907 if ((strncmp_const_skip(t, "ERROR ", t, tch) == 0) ||
1908 (strncmp_const_skip(t, "WARNING ", t, tch) == 0)) {
1910 skip_whitespace(t, tch);
1916 * If the "error" is that the "noop" service is unknown, it
1917 * just means the client is "old" (does not support the servie).
1918 * We can ignore this.
1920 if(hostp->features == NULL
1921 && pkt->type == P_NAK
1922 && (strcmp(t - 1, "unknown service: noop") == 0
1923 || strcmp(t - 1, "noop: invalid service") == 0)) {
1924 skip_quoted_line(s, ch);
1928 if (t) /* truncate after the first line */
1930 errbuf = vstralloc(hostp->hostname,
1931 (pkt->type == P_NAK) ? "NAK " : "",
1940 skip_quoted_string(t, tch);
1942 disk = unquote_string(msg);
1944 skip_whitespace(t, tch);
1946 if (sscanf(t - 1, "%d", &level) != 1) {
1950 skip_integer(t, tch);
1951 skip_whitespace(t, tch);
1953 dp = lookup_hostdisk(hostp, disk);
1955 log_add(L_ERROR, _("%s: invalid reply from sendsize: `%s'\n"),
1956 hostp->hostname, line);
1961 if (strncmp_const(t-1,"SIZE ") == 0) {
1962 if (sscanf(t - 1, "SIZE %lld", &size_) != 1) {
1965 size = (gint64)size_;
1966 } else if ((strncmp_const(t-1,"ERROR ") == 0) ||
1967 (strncmp_const(t-1,"WARNING ") == 0)) {
1968 skip_non_whitespace(t, tch);
1969 skip_whitespace(t, tch);
1971 skip_quoted_string(t,tch);
1974 if (pkt->type == P_REP && !est(dp)->errstr) {
1975 est(dp)->errstr = unquote_string(msg);
1984 for (i = 0; i < MAX_LEVELS; i++) {
1985 if (est(dp)->estimate[i].level == level) {
1986 if (size == (gint64)-2) {
1987 est(dp)->estimate[i].nsize = -1; /* remove estimate */
1988 est(dp)->estimate[i].guessed = 0;
1989 } else if (size > (gint64)-1) {
1990 /* take the size returned by the client */
1991 est(dp)->estimate[i].nsize = size;
1992 est(dp)->estimate[i].guessed = 0;
1997 if (i == MAX_LEVELS && level > 0) {
1998 /* client always report level 0 for some error */
1999 goto bad_msg; /* this est wasn't requested */
2001 est(dp)->got_estimate++;
2005 skip_quoted_line(s, ch);
2008 if(hostp->up == HOST_READY && hostp->features == NULL) {
2010 * The client does not support the features list, so give it an
2013 dbprintf(_("no feature set from host %s\n"), hostp->hostname);
2014 hostp->features = am_set_default_feature_set();
2017 security_close_connection(sech, hostp->hostname);
2019 /* XXX what about disks that only got some estimates... do we care? */
2020 /* XXX amanda 2.1 treated that case as a bad msg */
2022 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
2023 if(dp->todo == 0) continue;
2024 if(est(dp)->state != DISK_ACTIVE &&
2025 est(dp)->state != DISK_PARTIALY_DONE) continue;
2027 if(est(dp)->state == DISK_ACTIVE) {
2028 remove_disk(&waitq, dp);
2030 else if(est(dp)->state == DISK_PARTIALY_DONE) {
2031 remove_disk(&pestq, dp);
2034 if(pkt->type == P_REP) {
2035 est(dp)->state = DISK_DONE;
2037 else if(pkt->type == P_PREP) {
2038 est(dp)->state = DISK_PARTIALY_DONE;
2041 if (est(dp)->estimate[0].level == -1) continue; /* ignore this disk */
2044 qname = quote_string(dp->name);
2045 if(pkt->type == P_PREP) {
2046 g_fprintf(stderr,_("%s: time %s: got partial result for host %s disk %s:"),
2047 get_pname(), walltime_str(curclock()),
2048 dp->host->hostname, qname);
2049 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
2050 est(dp)->estimate[0].level,
2051 (long long)est(dp)->estimate[0].nsize,
2052 est(dp)->estimate[1].level,
2053 (long long)est(dp)->estimate[1].nsize,
2054 est(dp)->estimate[2].level,
2055 (long long)est(dp)->estimate[2].nsize);
2056 enqueue_disk(&pestq, dp);
2058 else if(pkt->type == P_REP) {
2059 g_fprintf(stderr,_("%s: time %s: got result for host %s disk %s:"),
2060 get_pname(), walltime_str(curclock()),
2061 dp->host->hostname, qname);
2062 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
2063 est(dp)->estimate[0].level,
2064 (long long)est(dp)->estimate[0].nsize,
2065 est(dp)->estimate[1].level,
2066 (long long)est(dp)->estimate[1].nsize,
2067 est(dp)->estimate[2].level,
2068 (long long)est(dp)->estimate[2].nsize);
2069 if ((est(dp)->estimate[0].level != -1 &&
2070 est(dp)->estimate[0].nsize > (gint64)0) ||
2071 (est(dp)->estimate[1].level != -1 &&
2072 est(dp)->estimate[1].nsize > (gint64)0) ||
2073 (est(dp)->estimate[2].level != -1 &&
2074 est(dp)->estimate[2].nsize > (gint64)0)) {
2076 for (i=MAX_LEVELS-1; i >=0; i--) {
2077 if (est(dp)->estimate[i].level != -1 &&
2078 est(dp)->estimate[i].nsize < (gint64)0) {
2079 est(dp)->estimate[i].level = -1;
2082 enqueue_disk(&estq, dp);
2085 enqueue_disk(&failq, dp);
2086 if(est(dp)->got_estimate && !est(dp)->errstr) {
2087 est(dp)->errstr = vstrallocf("disk %s, all estimate failed",
2092 _("error result for host %s disk %s: missing estimate\n"),
2093 dp->host->hostname, qname);
2094 if (est(dp)->errstr == NULL) {
2095 est(dp)->errstr = vstrallocf(_("missing result for %s in %s response"),
2096 qname, dp->host->hostname);
2100 hostp->up = HOST_DONE;
2102 if (est(dp)->post_dle == 0 &&
2103 (pkt->type == P_REP ||
2104 ((est(dp)->estimate[0].level == -1 ||
2105 est(dp)->estimate[0].nsize > (gint64)0) &&
2106 (est(dp)->estimate[1].level == -1 ||
2107 est(dp)->estimate[1].nsize > (gint64)0) &&
2108 (est(dp)->estimate[2].level == -1 ||
2109 est(dp)->estimate[2].nsize > (gint64)0)))) {
2110 run_server_dle_scripts(EXECUTE_ON_POST_DLE_ESTIMATE,
2111 get_config_name(), dp,
2112 est(dp)->estimate[0].level);
2113 est(dp)->post_dle = 1;
2118 if(hostp->up == HOST_DONE) {
2119 if (pkt->type == P_REP) {
2120 run_server_host_scripts(EXECUTE_ON_POST_HOST_ESTIMATE,
2121 get_config_name(), hostp);
2126 /* try to clean up any defunct processes, since Amanda doesn't wait() for
2128 while(waitpid(-1, NULL, WNOHANG)> 0);
2133 errbuf = vstrallocf(_("%s NAK: [NAK parse failed]"), hostp->hostname);
2134 g_fprintf(stderr, _("got strange nak from %s:\n----\n%s----\n\n"),
2135 hostp->hostname, pkt->body);
2139 g_fprintf(stderr,_("got a bad message, stopped at:\n"));
2141 g_fprintf(stderr,_("----\n%s----\n\n"), line);
2142 errbuf = stralloc2(_("badly formatted response from "), hostp->hostname);
2147 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
2149 if(est(dp)->state == DISK_ACTIVE) {
2150 qname = quote_string(dp->name);
2151 est(dp)->state = DISK_DONE;
2152 remove_disk(&waitq, dp);
2153 enqueue_disk(&failq, dp);
2156 est(dp)->errstr = stralloc(errbuf);
2157 g_fprintf(stderr, _("error result for host %s disk %s: %s\n"),
2158 dp->host->hostname, qname, errbuf);
2165 * If there were no disks involved, make sure the error gets
2168 log_add(L_ERROR, "%s", errbuf);
2169 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
2171 qname = quote_string(dp->name);
2172 est(dp)->state = DISK_DONE;
2173 remove_disk(&waitq, dp);
2174 enqueue_disk(&failq, dp);
2176 est(dp)->errstr = g_strdup(errbuf);
2177 g_fprintf(stderr, _("error result for host %s disk %s: %s\n"),
2178 dp->host->hostname, qname, errbuf);
2183 hostp->up = HOST_DONE;
2185 /* try to clean up any defunct processes, since Amanda doesn't wait() for
2187 while(waitpid(-1, NULL, WNOHANG)> 0);
2194 * ========================================================================
2199 static int schedule_order(disk_t *a, disk_t *b); /* subroutines */
2200 static one_est_t *pick_inclevel(disk_t *dp);
2202 static void analyze_estimate(
2208 char *qname = quote_string(dp->name);
2212 g_fprintf(stderr, _("pondering %s:%s... "),
2213 dp->host->hostname, qname);
2214 g_fprintf(stderr, _("next_level0 %d last_level %d "),
2215 ep->next_level0, ep->last_level);
2217 if(get_info(dp->host->hostname, dp->name, &info) == 0) {
2221 ep->degr_est = &default_one_est;
2223 if (ep->next_level0 <= 0 || (have_info && ep->last_level == 0
2224 && (ISSET(info.command, FORCE_NO_BUMP)))) {
2225 if (ep->next_level0 <= 0) {
2226 g_fprintf(stderr,_("(due for level 0) "));
2228 ep->dump_est = est_for_level(dp, 0);
2229 if (ep->dump_est->csize <= (gint64)0) {
2231 _("(no estimate for level 0, picking an incr level)\n"));
2232 ep->dump_est = pick_inclevel(dp);
2234 if (ep->dump_est->nsize == (gint64)-1) {
2235 ep->dump_est = est_for_level(dp, ep->dump_est->level + 1);
2239 total_lev0 += (double) ep->dump_est->csize;
2240 if(ep->last_level == -1 || dp->skip_incr) {
2241 g_fprintf(stderr,_("(%s disk, can't switch to degraded mode)\n"),
2242 dp->skip_incr? "skip-incr":_("new"));
2243 if (dp->skip_incr && ep->degr_mesg == NULL) {
2244 ep->degr_mesg = _("Skpping: skip-incr disk can't be dumped in degraded mode");
2246 ep->degr_est = &default_one_est;
2249 /* fill in degraded mode info */
2250 g_fprintf(stderr,_("(picking inclevel for degraded mode)"));
2251 ep->degr_est = pick_inclevel(dp);
2252 if (ep->degr_est->level >= 0 &&
2253 ep->degr_est->csize == (gint64)-1) {
2254 ep->degr_est = est_for_level(dp, ep->degr_est->level + 1);
2256 if (ep->degr_est->csize == (gint64)-1) {
2257 g_fprintf(stderr,_("(no inc estimate)"));
2258 if (ep->degr_mesg == NULL)
2259 ep->degr_mesg = _("Skipping: an incremental estimate could not be performed, so disk cannot be dumped in degraded mode");
2260 ep->degr_est = &default_one_est;
2262 g_fprintf(stderr,"\n");
2267 g_fprintf(stderr,_("(not due for a full dump, picking an incr level)\n"));
2268 /* XXX - if this returns -1 may be we should force a total? */
2269 ep->dump_est = pick_inclevel(dp);
2271 if (ep->dump_est->csize == (gint64)-1) {
2272 ep->dump_est = est_for_level(dp, ep->last_level);
2274 if (ep->dump_est->csize == (gint64)-1) {
2275 ep->dump_est = est_for_level(dp, ep->last_level + 1);
2277 if (ep->dump_est->csize == (gint64)-1) {
2278 ep->dump_est = est_for_level(dp, 0);
2280 if (ep->degr_mesg == NULL) {
2281 ep->degr_mesg = _("Skipping: a full is not planned, so can't dump in degraded mode");
2285 if (ep->dump_est->level < 0) {
2287 char *q = quote_string("no estimate");
2289 g_fprintf(stderr,_(" no valid estimate\n"));
2290 for(i=0; i<MAX_LEVELS; i++) {
2291 if (est(dp)->estimate[i].level >= 0) {
2292 g_fprintf(stderr,(" level: %d nsize: %lld csize: %lld\n"),
2293 est(dp)->estimate[i].level,
2294 (long long)est(dp)->estimate[i].nsize,
2295 (long long)est(dp)->estimate[i].csize);
2298 log_add(L_WARNING, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2299 planner_timestamp, q);
2303 g_fprintf(stderr,_(" curr level %d nsize %lld csize %lld "),
2304 ep->dump_est->level, (long long)ep->dump_est->nsize,
2305 (long long)ep->dump_est->csize);
2307 insert_disk(&schedq, dp, schedule_order);
2309 total_size += (gint64)tt_blocksize_kb + ep->dump_est->csize + tape_mark;
2311 /* update the balanced size */
2312 if(!(dp->skip_full || dp->strategy == DS_NOFULL ||
2313 dp->strategy == DS_INCRONLY)) {
2316 lev0size = est_tape_size(dp, 0);
2317 if(lev0size == (gint64)-1) lev0size = ep->last_lev0size;
2319 if (dp->strategy == DS_NOINC) {
2320 balanced_size += (double)lev0size;
2321 } else if (dp->dumpcycle == 0) {
2322 balanced_size += (double)(lev0size * conf_dumpcycle / (gint64)runs_per_cycle);
2323 } else if (dp->dumpcycle != conf_dumpcycle) {
2324 balanced_size += (double)(lev0size * (conf_dumpcycle / dp->dumpcycle) / (gint64)runs_per_cycle);
2326 balanced_size += (double)(lev0size / (gint64)runs_per_cycle);
2330 g_fprintf(stderr,_("total size %lld total_lev0 %1.0lf balanced-lev0size %1.0lf\n"),
2331 (long long)total_size, total_lev0, balanced_size);
2333 /* Log errstr even if the estimate succeeded */
2334 /* It can be an error from a script */
2335 if (est(dp)->errstr) {
2336 char *qerrstr = quote_string(est(dp)->errstr);
2337 /* Log only a warning if a server estimate is available */
2338 if (est(dp)->estimate[0].nsize > 0 ||
2339 est(dp)->estimate[1].nsize > 0 ||
2340 est(dp)->estimate[2].nsize > 0) {
2341 log_add(L_WARNING, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2342 planner_timestamp, qerrstr);
2344 log_add(L_FAIL, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2345 planner_timestamp, qerrstr);
2353 static void handle_failed(
2356 char *errstr, *errstr1, *qerrstr;
2357 char *qname = quote_string(dp->name);
2359 errstr = est(dp)->errstr? est(dp)->errstr : _("hmm, no error indicator!");
2360 errstr1 = vstralloc("[",errstr,"]", NULL);
2361 qerrstr = quote_string(errstr1);
2364 g_fprintf(stderr, _("%s: FAILED %s %s %s 0 %s\n"),
2365 get_pname(), dp->host->hostname, qname, planner_timestamp, qerrstr);
2367 log_add(L_FAIL, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2368 planner_timestamp, qerrstr);
2372 /* XXX - memory leak with *dp */
2377 * insert-sort by decreasing priority, then
2378 * by decreasing size within priority levels.
2381 static int schedule_order(
2388 diff = est(b)->dump_priority - est(a)->dump_priority;
2389 if(diff != 0) return diff;
2391 ldiff = est(b)->dump_est->csize - est(a)->dump_est->csize;
2392 if(ldiff < (gint64)0) return -1; /* XXX - there has to be a better way to dothis */
2393 if(ldiff > (gint64)0) return 1;
2398 static one_est_t *pick_inclevel(
2401 one_est_t *level0_est, *base_est, *bump_est;
2405 level0_est = est_for_level(dp, 0);
2406 base_est = est_for_level(dp, est(dp)->last_level);
2408 /* if last night was level 0, do level 1 tonight, no ifs or buts */
2409 if (base_est->level == 0) {
2410 g_fprintf(stderr,_(" picklev: last night 0, so tonight level 1\n"));
2411 return est_for_level(dp, 1);
2414 /* if no-full option set, always do level 1 */
2415 if(dp->strategy == DS_NOFULL) {
2416 g_fprintf(stderr,_(" picklev: no-full set, so always level 1\n"));
2417 return est_for_level(dp, 1);
2420 /* if we didn't get an estimate, we can't do an inc */
2421 if (base_est->nsize == (gint64)-1) {
2422 bump_est = est_for_level(dp, est(dp)->last_level + 1);
2423 if (bump_est->nsize > (gint64)0) { /* FORCE_BUMP */
2424 g_fprintf(stderr,_(" picklev: bumping to level %d\n"), bump_est->level);
2427 g_fprintf(stderr,_(" picklev: no estimate for level %d, so no incs\n"), base_est->level);
2431 thresh = bump_thresh(base_est->level, level0_est->nsize, dp->bumppercent,
2432 dp->bumpsize, dp->bumpmult);
2435 _(" pick: size %lld level %d days %d (thresh %lldK, %d days)\n"),
2436 (long long)base_est->nsize, base_est->level, est(dp)->level_days,
2437 (long long)thresh, dp->bumpdays);
2439 if(base_est->level == (DUMP_LEVELS - 1)
2440 || est(dp)->level_days < dp->bumpdays
2441 || base_est->nsize <= thresh)
2444 bump_est = est_for_level(dp, base_est->level + 1);
2446 if (bump_est->nsize == (gint64)-1)
2449 g_fprintf(stderr, _(" pick: next size %lld... "),
2450 (long long)bump_est->nsize);
2452 if (base_est->nsize - bump_est->nsize < thresh) {
2453 g_fprintf(stderr, _("not bumped\n"));
2457 qname = quote_string(dp->name);
2458 g_fprintf(stderr, _("BUMPED\n"));
2459 log_add(L_INFO, _("Incremental of %s:%s bumped to level %d."),
2460 dp->host->hostname, qname, bump_est->level);
2470 ** ========================================================================
2473 ** We have two strategies here:
2477 ** If we are trying to fit too much on the tape something has to go. We
2478 ** try to delay totals until tomorrow by converting them into incrementals
2479 ** and, if that is not effective enough, dropping incrementals altogether.
2480 ** While we are searching for the guilty dump (the one that is really
2481 ** causing the schedule to be oversize) we have probably trampled on a lot of
2482 ** innocent dumps, so we maintain a "before image" list and use this to
2483 ** put back what we can.
2485 ** 2. Promote dumps.
2487 ** We try to keep the amount of tape used by total dumps the same each night.
2488 ** If there is some spare tape in this run we have a look to see if any of
2489 ** tonights incrementals could be promoted to totals and leave us with a
2490 ** more balanced cycle.
2493 static void delay_one_dump(disk_t *dp, int delete, ...);
2494 static int promote_highest_priority_incremental(void);
2495 static int promote_hills(void);
2497 /* delay any dumps that will not fit */
2498 static void delay_dumps(void)
2503 disk_t * delayed_dp;
2506 gint64 new_total; /* New total_size */
2507 char est_kb[20]; /* Text formatted dump size */
2508 int nb_forced_level_0;
2516 biq.head = biq.tail = NULL;
2519 ** 1. Delay dumps that are way oversize.
2521 ** Dumps larger that the size of the tapes we are using are just plain
2522 ** not going to fit no matter how many other dumps we drop. Delay
2523 ** oversize totals until tomorrow (by which time my owner will have
2524 ** resolved the problem!) and drop incrementals altogether. Naturally
2525 ** a large total might be delayed into a large incremental so these
2526 ** need to be checked for separately.
2529 for(dp = schedq.head; dp != NULL; dp = ndp) {
2530 int avail_tapes = 1;
2531 if (dp->splitsize > (gint64)0 || dp->allow_split)
2532 avail_tapes = conf_runtapes;
2534 ndp = dp->next; /* remove_disk zaps this */
2536 full_size = est_tape_size(dp, 0);
2537 if (full_size > tapetype_get_length(tape) * (gint64)avail_tapes) {
2538 char *qname = quote_string(dp->name);
2539 if (conf_runtapes > 1 && dp->splitsize == (gint64)0) {
2540 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"
2541 ", you could define a splitsize"),
2542 dp->host->hostname, qname,
2543 (long long)full_size);
2545 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"),
2546 dp->host->hostname, qname,
2547 (long long)full_size);
2552 if (est(dp)->dump_est->csize == (gint64)-1 ||
2553 est(dp)->dump_est->csize <= tapetype_get_length(tape) * (gint64)avail_tapes) {
2557 /* Format dumpsize for messages */
2558 g_snprintf(est_kb, 20, "%lld KB,",
2559 (long long)est(dp)->dump_est->csize);
2561 if(est(dp)->dump_est->level == 0) {
2564 message = _("but cannot incremental dump skip-incr disk");
2566 else if(est(dp)->last_level < 0) {
2568 message = _("but cannot incremental dump new disk");
2570 else if(est(dp)->degr_est->level < 0) {
2572 message = _("but no incremental estimate");
2574 else if (est(dp)->degr_est->csize > tapetype_get_length(tape)) {
2576 message = _("incremental dump also larger than tape");
2580 message = _("full dump delayed, doing incremental");
2585 message = _("skipping incremental");
2587 delay_one_dump(dp, delete, _("dump larger than available tape space,"),
2588 est_kb, message, NULL);
2592 ** 2. Delay total dumps.
2594 ** Delay total dumps until tomorrow (or the day after!). We start with
2595 ** the lowest priority (most dispensable) and work forwards. We take
2596 ** care not to delay *all* the dumps since this could lead to a stale
2597 ** mate [for any one disk there are only three ways tomorrows dump will
2598 ** be smaller than todays: 1. we do a level 0 today so tomorows dump
2599 ** will be a level 1; 2. the disk gets more data so that it is bumped
2600 ** tomorrow (this can be a slow process); and, 3. the disk looses some
2601 ** data (when does that ever happen?)].
2604 nb_forced_level_0 = 0;
2606 timestamps = 2147483647;
2608 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2609 if (est(dp)->dump_est->level == 0) {
2611 est(dp)->dump_priority > priority ||
2612 (est(dp)->dump_priority == priority &&
2613 est(dp)->info->inf[0].date < timestamps)) {
2614 priority = est(dp)->dump_priority;
2615 timestamps = est(dp)->info->inf[0].date;
2621 /* 2.a. Do not delay forced full */
2626 for(dp = schedq.tail;
2627 dp != NULL && total_size > tape_length;
2631 if(est(dp)->dump_est->level != 0) continue;
2633 get_info(dp->host->hostname, dp->name, &info);
2634 if(ISSET(info.command, FORCE_FULL)) {
2635 nb_forced_level_0 += 1;
2640 if (dp != preserve &&
2641 est(dp)->info->inf[0].date > timestamps) {
2643 timestamps = est(dp)->info->inf[0].date;
2647 /* Format dumpsize for messages */
2648 g_snprintf(est_kb, 20, "%lld KB,",
2649 (long long)est(delayed_dp)->dump_est->csize);
2651 if(delayed_dp->skip_incr) {
2653 message = _("but cannot incremental dump skip-incr disk");
2655 else if(est(delayed_dp)->last_level < 0) {
2657 message = _("but cannot incremental dump new disk");
2659 else if(est(delayed_dp)->degr_est->level < 0) {
2661 message = _("but no incremental estimate");
2665 message = _("full dump delayed, doing incremental");
2667 delay_one_dump(delayed_dp, delete, _("dumps too big,"), est_kb,
2670 } while (delayed_dp);
2672 /* 2.b. Delay forced full if needed */
2673 if(nb_forced_level_0 > 0 && total_size > tape_length) {
2674 for(dp = schedq.tail;
2675 dp != NULL && total_size > tape_length;
2679 if(est(dp)->dump_est->level == 0 && dp != preserve) {
2681 /* Format dumpsize for messages */
2682 g_snprintf(est_kb, 20, "%lld KB,",
2683 (long long)est(dp)->dump_est->csize);
2687 message = _("but cannot incremental dump skip-incr disk");
2689 else if(est(dp)->last_level < 0) {
2691 message = _("but cannot incremental dump new disk");
2693 else if(est(dp)->degr_est->level < 0) {
2695 message = _("but no incremental estimate");
2699 message = _("full dump delayed");
2701 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2708 ** 3. Delay incremental dumps.
2710 ** Delay incremental dumps until tomorrow. This is a last ditch attempt
2711 ** at making things fit. Again, we start with the lowest priority (most
2712 ** dispensable) and work forwards.
2715 for(dp = schedq.tail;
2716 dp != NULL && total_size > tape_length;
2720 if(est(dp)->dump_est->level != 0) {
2722 /* Format dumpsize for messages */
2723 g_snprintf(est_kb, 20, "%lld KB,",
2724 (long long)est(dp)->dump_est->csize);
2726 delay_one_dump(dp, 1,
2727 _("dumps way too big,"),
2729 _("must skip incremental dumps"),
2735 ** 4. Reinstate delayed dumps.
2737 ** We might not have needed to stomp on all of the dumps we have just
2738 ** delayed above. Try to reinstate them all starting with the last one
2739 ** and working forwards. It is unlikely that the last one will fit back
2740 ** in but why complicate the code?
2743 /*@i@*/ for(bi = biq.tail; bi != NULL; bi = nbi) {
2744 int avail_tapes = 1;
2747 if(dp->splitsize > (gint64)0)
2748 avail_tapes = conf_runtapes;
2751 new_total = total_size + (gint64)tt_blocksize_kb +
2752 bi->csize + (gint64)tape_mark;
2754 new_total = total_size - est(dp)->dump_est->csize + bi->csize;
2756 if((new_total <= tape_length) &&
2757 (bi->csize < (tapetype_get_length(tape) * (gint64)avail_tapes))) {
2759 total_size = new_total;
2761 if(bi->level == 0) {
2762 total_lev0 += (double) bi->csize;
2764 insert_disk(&schedq, dp, schedule_order);
2767 est(dp)->dump_est = est_for_level(dp, bi->level);
2771 if(bi->next == NULL)
2772 biq.tail = bi->prev;
2774 (bi->next)->prev = bi->prev;
2775 if(bi->prev == NULL)
2776 biq.head = bi->next;
2778 (bi->prev)->next = bi->next;
2786 ** 5. Output messages about what we have done.
2788 ** We can't output messages while we are delaying dumps because we might
2789 ** reinstate them later. We remember all the messages and output them
2793 /*@i@*/ for(bi = biq.head; bi != NULL; bi = nbi) {
2796 g_fprintf(stderr, "%s: FAILED %s\n", get_pname(), bi->errstr);
2797 log_add(L_FAIL, "%s", bi->errstr);
2801 g_fprintf(stderr, _(" delay: %s now at level %d\n"),
2802 bi->errstr, est(dp)->dump_est->level);
2803 log_add(L_INFO, "%s", bi->errstr);
2811 g_fprintf(stderr, _(" delay: Total size now %lld.\n"),
2812 (long long)total_size);
2819 * Remove a dump or modify it from full to incremental.
2820 * Keep track of it on the bi q in case we can add it back later.
2823 static void delay_one_dump,
2829 char level_str[NUM_STR_SIZE];
2832 char *qname = quote_string(dp->name);
2833 char *errstr, *qerrstr;
2835 arglist_start(argp, delete);
2837 total_size -= (gint64)tt_blocksize_kb + est(dp)->dump_est->csize + (gint64)tape_mark;
2838 if(est(dp)->dump_est->level == 0) {
2839 total_lev0 -= (double) est(dp)->dump_est->csize;
2842 bi = alloc(SIZEOF(bi_t));
2844 bi->prev = biq.tail;
2845 if(biq.tail == NULL)
2848 biq.tail->next = bi;
2851 bi->deleted = delete;
2853 bi->level = est(dp)->dump_est->level;
2854 bi->nsize = est(dp)->dump_est->nsize;
2855 bi->csize = est(dp)->dump_est->csize;
2857 g_snprintf(level_str, SIZEOF(level_str), "%d", est(dp)->dump_est->level);
2858 bi->errstr = vstralloc(dp->host->hostname,
2860 " ", planner_timestamp ? planner_timestamp : "?",
2865 while ((next = arglist_val(argp, char *)) != NULL) {
2866 vstrextend(&errstr, sep, next, NULL);
2869 strappend(errstr, "]");
2870 qerrstr = quote_string(errstr);
2871 vstrextend(&bi->errstr, " ", qerrstr, NULL);
2877 remove_disk(&schedq, dp);
2879 est(dp)->dump_est = est(dp)->degr_est;
2880 total_size += (gint64)tt_blocksize_kb + est(dp)->dump_est->csize + (gint64)tape_mark;
2887 static int promote_highest_priority_incremental(void)
2889 disk_t *dp, *dp1, *dp_promote;
2890 gint64 new_total, new_lev0;
2892 int nb_today, nb_same_day, nb_today2;
2893 int nb_disk_today, nb_disk_same_day;
2897 * return 1 if did so; must update total_size correctly; must not
2898 * cause total_size to exceed tape_length
2902 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2903 one_est_t *level0_est = est_for_level(dp, 0);
2904 est(dp)->promote = -1000;
2906 if (level0_est->nsize <= (gint64)0)
2909 if(est(dp)->next_level0 <= 0)
2912 if(est(dp)->next_level0 > dp->maxpromoteday)
2915 new_total = total_size - est(dp)->dump_est->csize + level0_est->csize;
2916 new_lev0 = (gint64)total_lev0 + level0_est->csize;
2921 nb_disk_same_day = 0;
2922 for(dp1 = schedq.head; dp1 != NULL; dp1 = dp1->next) {
2923 if(est(dp1)->dump_est->level == 0)
2925 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2927 if(strcmp(dp->host->hostname, dp1->host->hostname) == 0) {
2928 if(est(dp1)->dump_est->level == 0)
2930 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2935 /* do not promote if overflow tape */
2936 if(new_total > tape_length)
2939 /* do not promote if overflow balanced size and something today */
2940 /* promote if nothing today */
2941 if((new_lev0 > (gint64)(balanced_size + balance_threshold)) &&
2942 (nb_disk_today > 0))
2945 /* do not promote if only one disk due that day and nothing today */
2946 if(nb_disk_same_day == 1 && nb_disk_today == 0)
2949 nb_today2 = nb_today*nb_today;
2950 if(nb_today == 0 && nb_same_day > 1)
2953 if(nb_same_day >= nb_today2) {
2954 est(dp)->promote = ((nb_same_day - nb_today2)*(nb_same_day - nb_today2)) +
2955 conf_dumpcycle - est(dp)->next_level0;
2958 est(dp)->promote = -nb_today2 +
2959 conf_dumpcycle - est(dp)->next_level0;
2962 qname = quote_string(dp->name);
2963 if(!dp_promote || est(dp_promote)->promote < est(dp)->promote) {
2965 g_fprintf(stderr," try %s:%s %d %d %d = %d\n",
2966 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2969 g_fprintf(stderr,"no try %s:%s %d %d %d = %d\n",
2970 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2976 one_est_t *level0_est;
2978 level0_est = est_for_level(dp, 0);
2980 qname = quote_string(dp->name);
2981 new_total = total_size - est(dp)->dump_est->csize + level0_est->csize;
2982 new_lev0 = (gint64)total_lev0 + level0_est->csize;
2984 total_size = new_total;
2985 total_lev0 = (double)new_lev0;
2986 check_days = est(dp)->next_level0;
2987 est(dp)->degr_est = est(dp)->dump_est;
2988 est(dp)->dump_est = level0_est;
2989 est(dp)->next_level0 = 0;
2992 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2993 dp->host->hostname, qname,
2994 total_lev0, (long long)total_size);
2997 plural(_("Full dump of %s:%s promoted from %d day ahead."),
2998 _("Full dump of %s:%s promoted from %d days ahead."),
3000 dp->host->hostname, qname, check_days);
3008 static int promote_hills(void)
3011 struct balance_stats {
3022 /* If we are already doing a level 0 don't bother */
3026 /* Do the guts of an "amadmin balance" */
3027 my_dumpcycle = conf_dumpcycle;
3028 if(my_dumpcycle > 10000) my_dumpcycle = 10000;
3030 sp = (struct balance_stats *)
3031 alloc(SIZEOF(struct balance_stats) * my_dumpcycle);
3033 for(days = 0; days < my_dumpcycle; days++) {
3035 sp[days].size = (gint64)0;
3038 for(dp = schedq.head; dp != NULL; dp = dp->next) {
3039 days = est(dp)->next_level0;
3040 if (days < 0) days = 0;
3041 if(days<my_dumpcycle && !dp->skip_full && dp->strategy != DS_NOFULL &&
3042 dp->strategy != DS_INCRONLY) {
3044 sp[days].size += est(dp)->last_lev0size;
3048 /* Search for a suitable big hill and cut it down */
3050 /* Find the tallest hill */
3051 hill_size = (gint64)0;
3052 for(days = 0; days < my_dumpcycle; days++) {
3053 if(sp[days].disks > 1 && sp[days].size > hill_size) {
3054 hill_size = sp[days].size;
3059 if(hill_size <= (gint64)0) break; /* no suitable hills */
3061 /* Find all the dumps in that hill and try and remove one */
3062 for(dp = schedq.head; dp != NULL; dp = dp->next) {
3063 one_est_t *level0_est;
3064 if(est(dp)->next_level0 != hill_days ||
3065 est(dp)->next_level0 > dp->maxpromoteday ||
3067 dp->strategy == DS_NOFULL ||
3068 dp->strategy == DS_INCRONLY)
3070 level0_est = est_for_level(dp, 0);
3071 if (level0_est->nsize <= (gint64)0)
3073 new_total = total_size - est(dp)->dump_est->csize + level0_est->csize;
3074 if(new_total > tape_length)
3076 /* We found a disk we can promote */
3077 qname = quote_string(dp->name);
3078 total_size = new_total;
3079 total_lev0 += (double)level0_est->csize;
3080 est(dp)->degr_est = est(dp)->dump_est;
3081 est(dp)->dump_est = level0_est;
3082 est(dp)->next_level0 = 0;
3085 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
3086 dp->host->hostname, qname,
3087 total_lev0, (long long)total_size);
3090 plural(_("Full dump of %s:%s specially promoted from %d day ahead."),
3091 _("Full dump of %s:%s specially promoted from %d days ahead."),
3093 dp->host->hostname, qname, hill_days);
3099 /* All the disks in that hill were unsuitable. */
3100 sp[hill_days].disks = 0; /* Don't get tricked again */
3108 * ========================================================================
3111 * XXX - memory leak - we shouldn't just throw away *dp
3113 static void output_scheduleline(
3117 time_t dump_time = 0, degr_time = 0;
3118 double dump_kps = 0, degr_kps = 0;
3119 char *schedline = NULL, *degr_str = NULL;
3120 char dump_priority_str[NUM_STR_SIZE];
3121 char dump_level_str[NUM_STR_SIZE];
3122 char dump_nsize_str[NUM_STR_SIZE];
3123 char dump_csize_str[NUM_STR_SIZE];
3124 char dump_time_str[NUM_STR_SIZE];
3125 char dump_kps_str[NUM_STR_SIZE];
3126 char degr_level_str[NUM_STR_SIZE];
3127 char degr_nsize_str[NUM_STR_SIZE];
3128 char degr_csize_str[NUM_STR_SIZE];
3129 char degr_time_str[NUM_STR_SIZE];
3130 char degr_kps_str[NUM_STR_SIZE];
3131 char *dump_date, *degr_date;
3133 char *qname = quote_string(dp->name);
3137 if(ep->dump_est->csize == (gint64)-1) {
3138 /* no estimate, fail the disk */
3140 _("%s: FAILED %s %s %s %d \"[no estimate]\"\n"),
3142 dp->host->hostname, qname, planner_timestamp, ep->dump_est->level);
3143 log_add(L_FAIL, _("%s %s %s %d [no estimate]"),
3144 dp->host->hostname, qname, planner_timestamp, ep->dump_est->level);
3149 dump_date = ep->dump_est->dumpdate;
3150 degr_date = ep->degr_est->dumpdate;
3152 #define fix_rate(rate) (rate < 1.0 ? DEFAULT_DUMPRATE : rate)
3154 if(ep->dump_est->level == 0) {
3155 dump_kps = fix_rate(ep->fullrate);
3156 dump_time = (time_t)((double)ep->dump_est->csize / dump_kps);
3158 if(ep->degr_est->csize != (gint64)-1) {
3159 degr_kps = fix_rate(ep->incrrate);
3160 degr_time = (time_t)((double)ep->degr_est->csize / degr_kps);
3164 dump_kps = fix_rate(ep->incrrate);
3165 dump_time = (time_t)((double)ep->dump_est->csize / dump_kps);
3168 if(ep->dump_est->level == 0 && ep->degr_est->csize != (gint64)-1) {
3169 g_snprintf(degr_level_str, sizeof(degr_level_str),
3170 "%d", ep->degr_est->level);
3171 g_snprintf(degr_nsize_str, sizeof(degr_nsize_str),
3172 "%lld", (long long)ep->degr_est->nsize);
3173 g_snprintf(degr_csize_str, sizeof(degr_csize_str),
3174 "%lld", (long long)ep->degr_est->csize);
3175 g_snprintf(degr_time_str, sizeof(degr_time_str),
3176 "%lld", (long long)degr_time);
3177 g_snprintf(degr_kps_str, sizeof(degr_kps_str),
3179 degr_str = vstralloc(" ", degr_level_str,
3181 " ", degr_nsize_str,
3182 " ", degr_csize_str,
3188 if (ep->degr_mesg) {
3189 degr_mesg = quote_string(ep->degr_mesg);
3191 degr_mesg = quote_string(_("Skipping: cannot dump in degraded mode for unknown reason"));
3193 degr_str = vstralloc(" ", degr_mesg, NULL);
3196 g_snprintf(dump_priority_str, SIZEOF(dump_priority_str),
3197 "%d", ep->dump_priority);
3198 g_snprintf(dump_level_str, SIZEOF(dump_level_str),
3199 "%d", ep->dump_est->level);
3200 g_snprintf(dump_nsize_str, sizeof(dump_nsize_str),
3201 "%lld", (long long)ep->dump_est->nsize);
3202 g_snprintf(dump_csize_str, sizeof(dump_csize_str),
3203 "%lld", (long long)ep->dump_est->csize);
3204 g_snprintf(dump_time_str, sizeof(dump_time_str),
3205 "%lld", (long long)dump_time);
3206 g_snprintf(dump_kps_str, sizeof(dump_kps_str),
3208 features = am_feature_to_string(dp->host->features);
3209 schedline = vstralloc("DUMP ",dp->host->hostname,
3212 " ", planner_timestamp,
3213 " ", dump_priority_str,
3214 " ", dump_level_str,
3216 " ", dump_nsize_str,
3217 " ", dump_csize_str,
3220 degr_str ? degr_str : "",
3223 if (est(dp)->dump_est->guessed == 1) {
3224 log_add(L_WARNING, _("WARNING: no history available for %s:%s; guessing that size will be %lld KB\n"), dp->host->hostname, qname, (long long)est(dp)->dump_est->csize);
3226 fputs(schedline, stdout);
3227 fputs(schedline, stderr);
3244 size = internal_server_estimate(dp, info, level, &stats);
3246 est(dp)->dump_est = &est(dp)->estimate[i];
3247 est(dp)->estimate[i].nsize = size;
3249 est(dp)->estimate[i].guessed = 1;