2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1999 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: planner.c 10421 2008-03-06 18:48:30Z martineau $
29 * backup schedule planner for the Amanda backup system.
43 #include "amfeatures.h"
44 #include "server_util.h"
46 #include "timestamp.h"
49 #define planner_debug(i,x) do { \
50 if ((i) <= debug_planner) { \
55 #define MAX_LEVELS 3 /* max# of estimates per filesys */
57 #define RUNS_REDZONE 5 /* should be in conf file? */
59 #define PROMOTE_THRESHOLD 0.05 /* if <5% unbalanced, don't promote */
60 #define DEFAULT_DUMPRATE 1024.0 /* K/s */
62 /* configuration file stuff */
65 gint64 conf_maxdumpsize;
68 int conf_runspercycle;
73 int conf_usetimestamps;
75 #define HOST_READY ((void *)0) /* must be 0 */
76 #define HOST_ACTIVE ((void *)1)
77 #define HOST_DONE ((void *)2)
79 #define DISK_READY 0 /* must be 0 */
81 #define DISK_PARTIALY_DONE 2
84 typedef struct est_s {
89 gint64 dump_nsize; /* native size */
90 gint64 dump_csize; /* compressed size */
91 int degr_level; /* if dump_level == 0, what would be the inc level */
92 gint64 degr_nsize; /* native degraded size */
93 gint64 degr_csize; /* compressed degraded size */
100 double fullrate, incrrate;
101 double fullcomp, incrcomp;
103 int level[MAX_LEVELS];
104 char *dumpdate[MAX_LEVELS];
105 gint64 est_size[MAX_LEVELS];
109 #define est(dp) ((est_t *)(dp)->up)
111 /* pestq = partial estimate */
112 disklist_t startq, waitq, pestq, estq, failq, schedq;
114 double total_lev0, balanced_size, balance_threshold;
120 size_t tt_blocksize_kb;
121 int runs_per_cycle = 0;
123 char *planner_timestamp = NULL;
125 static am_feature_t *our_features = NULL;
126 static char *our_feature_string = NULL;
128 /* We keep a LIFO queue of before images for all modifications made
129 * to schedq in our attempt to make the schedule fit on the tape.
130 * Enough information is stored to reinstate a dump if it turns out
131 * that it shouldn't have been touched after all.
133 typedef struct bi_s {
136 int deleted; /* 0=modified, 1=deleted */
137 disk_t *dp; /* The disk that was changed */
138 int level; /* The original level */
139 gint64 nsize; /* The original native size */
140 gint64 csize; /* The original compressed size */
141 char *errstr; /* A message describing why this disk is here */
144 typedef struct bilist_s {
148 bilist_t biq; /* The BI queue itself */
151 * ========================================================================
156 static void setup_estimate(disk_t *dp);
157 static void get_estimates(void);
158 static void analyze_estimate(disk_t *dp);
159 static void handle_failed(disk_t *dp);
160 static void delay_dumps(void);
161 static int promote_highest_priority_incremental(void);
162 static int promote_hills(void);
163 static void output_scheduleline(disk_t *dp);
164 int main(int, char **);
180 times_t section_start;
184 config_overwrites_t *cfg_ovr = NULL;
185 char *cfg_opt = NULL;
187 int exit_status = EXIT_SUCCESS;
190 * Configure program for internationalization:
191 * 1) Only set the message locale for now.
192 * 2) Set textdomain for all amanda related programs to "amanda"
193 * We don't want to be forced to support dozens of message catalogs.
195 setlocale(LC_MESSAGES, "C");
196 textdomain("amanda");
198 /* drop root privileges */
199 planner_setuid = set_root_privs(0);
203 set_pname("planner");
205 dbopen(DBG_SUBDIR_SERVER);
207 cfg_ovr = extract_commandline_config_overwrites(&argc, &argv);
211 config_init(CONFIG_INIT_EXPLICIT_NAME | CONFIG_INIT_USE_CWD, cfg_opt);
212 apply_config_overwrites(cfg_ovr);
214 /* conf_diskfile is freed later, as it may be used in an error message */
215 conf_diskfile = config_dir_relative(getconf_str(CNF_DISKFILE));
216 read_diskfile(conf_diskfile, &origq);
218 /* Don't die when child closes pipe */
219 signal(SIGPIPE, SIG_IGN);
221 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
223 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
224 set_logerror(logerror);
226 if (!planner_setuid) {
227 error(_("planner must be run setuid root"));
230 if (config_errors(NULL) >= CFGERR_ERRORS) {
231 g_critical(_("errors processing config file"));
236 check_running_as(RUNNING_AS_DUMPUSER);
238 dbrename(get_config_name(), DBG_SUBDIR_SERVER);
241 section_start = curclock();
243 our_features = am_init_feature_set();
244 our_feature_string = am_feature_to_string(our_features);
246 log_add(L_INFO, "%s pid %ld", get_pname(), (long)getpid());
247 g_fprintf(stderr, _("%s: pid %ld executable %s version %s\n"),
248 get_pname(), (long) getpid(), argv[0], version());
249 for (i = 0; version_info[i] != NULL; i++)
250 g_fprintf(stderr, _("%s: %s"), get_pname(), version_info[i]);
253 if (argc > 3 && strcmp(argv[2], "--starttime") == 0) {
254 planner_timestamp = stralloc(argv[3]);
260 * 1. Networking Setup
267 * 2. Read in Configuration Information
269 * All the Amanda configuration files are loaded before we begin.
272 g_fprintf(stderr,_("READING CONF INFO...\n"));
274 if(origq.head == NULL) {
275 error(_("empty disklist \"%s\""), conf_diskfile);
279 errstr = match_disklist(&origq, argc-diskarg_offset,
280 argv+diskarg_offset);
282 g_fprintf(stderr,"%s",errstr);
284 exit_status = EXIT_FAILURE;
287 for(dp = origq.head; dp != NULL; dp = dp->next) {
289 qname = quote_string(dp->name);
290 log_add(L_DISK, "%s %s", dp->host->hostname, qname);
297 error(_("no DLE to backup"));
300 amfree(conf_diskfile);
302 conf_tapelist = config_dir_relative(getconf_str(CNF_TAPELIST));
303 if(read_tapelist(conf_tapelist)) {
304 error(_("could not load tapelist \"%s\""), conf_tapelist);
307 amfree(conf_tapelist);
309 conf_infofile = config_dir_relative(getconf_str(CNF_INFOFILE));
310 if(open_infofile(conf_infofile)) {
311 error(_("could not open info db \"%s\""), conf_infofile);
314 if (check_infofile(conf_infofile, &origq, &errstr) == -1) {
315 log_add(L_WARNING, "problem copying infofile: %s", errstr);
318 amfree(conf_infofile);
320 conf_tapetype = getconf_str(CNF_TAPETYPE);
321 conf_maxdumpsize = getconf_int64(CNF_MAXDUMPSIZE);
322 conf_runtapes = getconf_int(CNF_RUNTAPES);
323 conf_dumpcycle = getconf_int(CNF_DUMPCYCLE);
324 conf_runspercycle = getconf_int(CNF_RUNSPERCYCLE);
325 conf_tapecycle = getconf_int(CNF_TAPECYCLE);
326 conf_etimeout = (time_t)getconf_int(CNF_ETIMEOUT);
327 conf_reserve = getconf_int(CNF_RESERVE);
328 conf_autoflush = getconf_boolean(CNF_AUTOFLUSH);
329 conf_usetimestamps = getconf_boolean(CNF_USETIMESTAMPS);
332 if (planner_timestamp) {
333 if (conf_usetimestamps == 0) {
334 planner_timestamp[8] = '\0';
336 } else if(conf_usetimestamps == 0) {
337 planner_timestamp = get_datestamp_from_time(0);
340 planner_timestamp = get_timestamp_from_time(0);
342 log_add(L_START, _("date %s"), planner_timestamp);
343 g_printf("DATE %s\n", planner_timestamp);
345 g_fprintf(stderr, _("%s: timestamp %s\n"),
346 get_pname(), planner_timestamp);
348 /* some initializations */
350 if(conf_runspercycle == 0) {
351 runs_per_cycle = conf_dumpcycle;
352 } else if(conf_runspercycle == -1 ) {
353 runs_per_cycle = guess_runs_from_tapelist();
355 runs_per_cycle = conf_runspercycle;
357 if (runs_per_cycle <= 0) {
362 * do some basic sanity checking
364 if(conf_tapecycle <= runs_per_cycle) {
365 log_add(L_WARNING, _("tapecycle (%d) <= runspercycle (%d)"),
366 conf_tapecycle, runs_per_cycle);
369 tape = lookup_tapetype(conf_tapetype);
370 if(conf_maxdumpsize > (gint64)0) {
371 tape_length = conf_maxdumpsize;
374 tape_length = tapetype_get_length(tape) * (gint64)conf_runtapes;
376 tape_mark = (size_t)tapetype_get_filemark(tape);
377 tt_blocksize_kb = (size_t)tapetype_get_blocksize(tape);
378 tt_blocksize = tt_blocksize_kb * 1024;
380 g_fprintf(stderr, _("%s: time %s: startup took %s secs\n"),
382 walltime_str(curclock()),
383 walltime_str(timessub(curclock(), section_start)));
386 * 3. Send autoflush dumps left on the holding disks
388 * This should give us something to do while we generate the new
392 g_fprintf(stderr,_("\nSENDING FLUSHES...\n"));
396 GSList *holding_list, *holding_file;
397 char *qdisk, *qhname;
399 /* get *all* flushable files in holding */
400 holding_list = holding_get_files_for_flush(NULL);
401 for(holding_file=holding_list; holding_file != NULL;
402 holding_file = holding_file->next) {
403 holding_file_get_dumpfile((char *)holding_file->data, &file);
405 if (holding_file_size((char *)holding_file->data, 1) <= 0) {
406 log_add(L_INFO, "%s: removing file with no data.",
407 (char *)holding_file->data);
408 holding_file_unlink((char *)holding_file->data);
409 dumpfile_free_data(&file);
413 qdisk = quote_string(file.disk);
414 qhname = quote_string((char *)holding_file->data);
415 log_add(L_DISK, "%s %s", file.name, qdisk);
417 "FLUSH %s %s %s %d %s\n",
424 "FLUSH %s %s %s %d %s\n",
432 dumpfile_free_data(&file);
434 g_slist_free_full(holding_list);
437 g_fprintf(stderr, _("ENDFLUSH\n"));
438 g_fprintf(stdout, _("ENDFLUSH\n"));
442 * 4. Calculate Preliminary Dump Levels
444 * Before we can get estimates from the remote slave hosts, we make a
445 * first attempt at guessing what dump levels we will be dumping at
446 * based on the curinfo database.
449 g_fprintf(stderr,_("\nSETTING UP FOR ESTIMATES...\n"));
450 section_start = curclock();
452 startq.head = startq.tail = NULL;
453 while(!empty(origq)) {
454 disk_t *dp = dequeue_disk(&origq);
460 g_fprintf(stderr, _("%s: time %s: setting up estimates took %s secs\n"),
462 walltime_str(curclock()),
463 walltime_str(timessub(curclock(), section_start)));
467 * 5. Get Dump Size Estimates from Remote Client Hosts
469 * Each host is queried (in parallel) for dump size information on all
470 * of its disks, and the results gathered as they come in.
473 /* go out and get the dump estimates */
475 g_fprintf(stderr,_("\nGETTING ESTIMATES...\n"));
476 section_start = curclock();
478 estq.head = estq.tail = NULL;
479 pestq.head = pestq.tail = NULL;
480 waitq.head = waitq.tail = NULL;
481 failq.head = failq.tail = NULL;
485 g_fprintf(stderr, _("%s: time %s: getting estimates took %s secs\n"),
487 walltime_str(curclock()),
488 walltime_str(timessub(curclock(), section_start)));
491 * At this point, all disks with estimates are in estq, and
492 * all the disks on hosts that didn't respond to our inquiry
496 dump_queue("FAILED", failq, 15, stderr);
497 dump_queue("DONE", estq, 15, stderr);
500 exit_status = EXIT_FAILURE;
504 * 6. Analyze Dump Estimates
506 * Each disk's estimates are looked at to determine what level it
507 * should dump at, and to calculate the expected size and time taking
508 * historical dump rates and compression ratios into account. The
509 * total expected size is accumulated as well.
512 g_fprintf(stderr,_("\nANALYZING ESTIMATES...\n"));
513 section_start = curclock();
515 /* an empty tape still has a label and an endmark */
516 total_size = ((gint64)tt_blocksize_kb + (gint64)tape_mark) * (gint64)2;
520 schedq.head = schedq.tail = NULL;
521 while(!empty(estq)) analyze_estimate(dequeue_disk(&estq));
522 while(!empty(failq)) handle_failed(dequeue_disk(&failq));
525 * At this point, all the disks are on schedq sorted by priority.
526 * The total estimated size of the backups is in total_size.
532 g_fprintf(stderr, _("INITIAL SCHEDULE (size %lld):\n"),
533 (long long)total_size);
534 for(dp = schedq.head; dp != NULL; dp = dp->next) {
535 qname = quote_string(dp->name);
536 g_fprintf(stderr, _(" %s %s pri %d lev %d nsize %lld csize %lld\n"),
537 dp->host->hostname, qname, est(dp)->dump_priority,
539 (long long)est(dp)->dump_nsize,
540 (long long)est(dp)->dump_csize);
547 * 7. Delay Dumps if Schedule Too Big
549 * If the generated schedule is too big to fit on the tape, we need to
550 * delay some full dumps to make room. Incrementals will be done
551 * instead (except for new or forced disks).
553 * In extreme cases, delaying all the full dumps is not even enough.
554 * If so, some low-priority incrementals will be skipped completely
555 * until the dumps fit on the tape.
558 g_fprintf(stderr, _("\nDELAYING DUMPS IF NEEDED, total_size %lld, tape length %lld mark %zu\n"),
559 (long long)total_size,
560 (long long)tape_length,
563 initial_size = total_size;
567 /* XXX - why bother checking this? */
568 if(empty(schedq) && total_size < initial_size) {
569 error(_("cannot fit anything on tape, bailing out"));
575 * 8. Promote Dumps if Schedule Too Small
577 * Amanda attempts to balance the full dumps over the length of the
578 * dump cycle. If this night's full dumps are too small relative to
579 * the other nights, promote some high-priority full dumps that will be
580 * due for the next run, to full dumps for tonight, taking care not to
581 * overflow the tape size.
583 * This doesn't work too well for small sites. For these we scan ahead
584 * looking for nights that have an excessive number of dumps and promote
587 * Amanda never delays full dumps just for the sake of balancing the
588 * schedule, so it can take a full cycle to balance the schedule after
593 _("\nPROMOTING DUMPS IF NEEDED, total_lev0 %1.0lf, balanced_size %1.0lf...\n"),
594 total_lev0, balanced_size);
596 balance_threshold = balanced_size * PROMOTE_THRESHOLD;
598 while((balanced_size - total_lev0) > balance_threshold && moved_one)
599 moved_one = promote_highest_priority_incremental();
601 moved_one = promote_hills();
603 g_fprintf(stderr, _("%s: time %s: analysis took %s secs\n"),
605 walltime_str(curclock()),
606 walltime_str(timessub(curclock(), section_start)));
612 * The schedule goes to stdout, presumably to driver. A copy is written
613 * on stderr for the debug file.
616 g_fprintf(stderr,_("\nGENERATING SCHEDULE:\n--------\n"));
618 exit_status = EXIT_FAILURE;
619 g_fprintf(stderr, _("--> Generated empty schedule! <--\n"));
621 while(!empty(schedq)) output_scheduleline(dequeue_disk(&schedq));
623 g_fprintf(stderr, _("--------\n"));
626 log_add(L_FINISH, _("date %s time %s"), planner_timestamp, walltime_str(curclock()));
627 log_add(L_INFO, "pid-done %ld", (long)getpid());
630 amfree(planner_timestamp);
631 amfree(our_feature_string);
632 am_release_feature_set(our_features);
643 * ========================================================================
644 * SETUP FOR ESTIMATES
648 static void askfor(est_t *, int, int, info_t *);
649 static int last_level(info_t *info); /* subroutines */
650 static gint64 est_size(disk_t *dp, int level);
651 static gint64 est_tape_size(disk_t *dp, int level);
652 static int next_level0(disk_t *dp, info_t *info);
653 static int runs_at(info_t *info, int lev);
654 static gint64 bump_thresh(int level, gint64 size_level_0, int bumppercent, gint64 bumpsize, double bumpmult);
655 static int when_overwrite(char *label);
658 est_t *ep, /* esimate data block */
659 int seq, /* sequence number of request */
660 int lev, /* dump level being requested */
661 info_t *info) /* info block for disk */
663 if(seq < 0 || seq >= MAX_LEVELS) {
664 error(_("error [planner askfor: seq out of range 0..%d: %d]"),
668 if(lev < -1 || lev >= DUMP_LEVELS) {
669 error(_("error [planner askfor: lev out of range -1..%d: %d]"),
676 ep->dumpdate[seq] = (char *)0;
677 ep->est_size[seq] = (gint64)-2;
681 ep->level[seq] = lev;
683 ep->dumpdate[seq] = stralloc(get_dumpdate(info,lev));
685 ep->est_size[seq] = (gint64)-2;
700 assert(dp && dp->host);
702 qname = quote_string(dp->name);
703 g_fprintf(stderr, _("%s: time %s: setting up estimates for %s:%s\n"),
704 get_pname(), walltime_str(curclock()),
705 dp->host->hostname, qname);
707 /* get current information about disk */
709 if(get_info(dp->host->hostname, dp->name, &info)) {
710 /* no record for this disk, make a note of it */
711 log_add(L_INFO, _("Adding new disk %s:%s."), dp->host->hostname, qname);
714 /* setup working data struct for disk */
716 ep = alloc(SIZEOF(est_t));
717 dp->up = (void *) ep;
718 ep->state = DISK_READY;
719 ep->dump_nsize = (gint64)-1;
720 ep->dump_csize = (gint64)-1;
721 ep->dump_priority = dp->priority;
725 ep->degr_mesg = NULL;
727 /* calculated fields */
729 if (ISSET(info.command, FORCE_FULL)) {
730 /* force a level 0, kind of like a new disk */
731 if(dp->strategy == DS_NOFULL) {
733 * XXX - Not sure what it means to force a no-full disk. The
734 * purpose of no-full is to just dump changes relative to a
735 * stable base, for example root partitions that vary only
736 * slightly from a site-wide prototype. Only the variations
739 * If we allow a level 0 onto the Amanda cycle, then we are
740 * hosed when that tape gets re-used next. Disallow this for
744 _("Cannot force full dump of %s:%s with no-full option."),
745 dp->host->hostname, qname);
747 /* clear force command */
748 CLR(info.command, FORCE_FULL);
749 if(put_info(dp->host->hostname, dp->name, &info)) {
750 error(_("could not put info record for %s:%s: %s"),
751 dp->host->hostname, qname, strerror(errno));
754 ep->last_level = last_level(&info);
755 ep->next_level0 = next_level0(dp, &info);
758 ep->degr_mesg = _("Can't switch to degraded mode when using a force-full disk");
760 ep->next_level0 = -conf_dumpcycle;
761 log_add(L_INFO, _("Forcing full dump of %s:%s as directed."),
762 dp->host->hostname, qname);
765 else if(dp->strategy == DS_NOFULL) {
766 /* force estimate of level 1 */
768 ep->next_level0 = next_level0(dp, &info);
771 ep->last_level = last_level(&info);
772 ep->next_level0 = next_level0(dp, &info);
775 /* adjust priority levels */
777 /* warn if dump will be overwritten */
778 if (ep->last_level > -1 && strlen(info.inf[0].label) > 0) {
779 overwrite_runs = when_overwrite(info.inf[0].label);
780 if(overwrite_runs == 0) {
781 log_add(L_WARNING, _("Last full dump of %s:%s "
782 "on tape %s overwritten on this run."),
783 dp->host->hostname, qname, info.inf[0].label);
784 } else if(overwrite_runs <= RUNS_REDZONE) {
786 plural(_("Last full dump of %s:%s on tape %s overwritten in %d run."),
787 _("Last full dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
788 dp->host->hostname, qname, info.inf[0].label,
793 /* warn if last level 1 will be overwritten */
794 if (ep->last_level > 1 && strlen(info.inf[1].label) > 0) {
795 overwrite_runs = when_overwrite(info.inf[1].label);
796 if(overwrite_runs == 0) {
797 log_add(L_WARNING, _("Last level 1 dump of %s:%s "
798 "on tape %s overwritten on this run, resetting to level 1"),
799 dp->host->hostname, qname, info.inf[1].label);
801 } else if(overwrite_runs <= RUNS_REDZONE) {
803 plural(_("Last level 1 dump of %s:%s on tape %s overwritten in %d run."),
804 _("Last level 1 dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
805 dp->host->hostname, qname, info.inf[1].label,
810 if(ep->next_level0 < 0) {
811 g_fprintf(stderr,plural(_("%s:%s overdue %d day for level 0\n"),
812 _("%s:%s overdue %d days for level 0\n"),
814 dp->host->hostname, qname, (-ep->next_level0));
815 ep->dump_priority -= ep->next_level0;
817 else if (ISSET(info.command, FORCE_FULL))
818 ep->dump_priority += 1;
819 /* else XXX bump up the priority of incrementals that failed last night */
821 /* handle external level 0 dumps */
823 if(dp->skip_full && dp->strategy != DS_NOINC) {
824 if(ep->next_level0 <= 0) {
825 /* update the date field */
826 info.inf[0].date = today;
827 CLR(info.command, FORCE_FULL);
828 ep->next_level0 += conf_dumpcycle;
830 if(put_info(dp->host->hostname, dp->name, &info)) {
831 error(_("could not put info record for %s:%s: %s"),
832 dp->host->hostname, qname, strerror(errno));
835 log_add(L_INFO, _("Skipping full dump of %s:%s today."),
836 dp->host->hostname, qname);
837 g_fprintf(stderr,_("%s:%s lev 0 skipped due to skip-full flag\n"),
838 dp->host->hostname, qname);
839 /* don't enqueue the disk */
840 askfor(ep, 0, -1, &info);
841 askfor(ep, 1, -1, &info);
842 askfor(ep, 2, -1, &info);
843 g_fprintf(stderr, _("%s: SKIPPED %s %s 0 [skip-full]\n"),
844 get_pname(), dp->host->hostname, qname);
845 log_add(L_SUCCESS, _("%s %s %s 0 [skipped: skip-full]"),
846 dp->host->hostname, qname, planner_timestamp);
851 if(ep->last_level == -1) {
852 /* probably a new disk, but skip-full means no full! */
856 if(ep->next_level0 == 1) {
857 log_add(L_WARNING, _("Skipping full dump of %s:%s tomorrow."),
858 dp->host->hostname, qname);
862 if(dp->strategy == DS_INCRONLY && ep->last_level == -1 && !ISSET(info.command, FORCE_FULL)) {
863 /* don't enqueue the disk */
864 askfor(ep, 0, -1, &info);
865 askfor(ep, 1, -1, &info);
866 askfor(ep, 2, -1, &info);
867 log_add(L_FAIL, _("%s %s 19000101 1 [Skipping incronly because no full dump were done]"),
868 dp->host->hostname, qname);
869 g_fprintf(stderr,_("%s:%s lev 1 skipped due to strategy incronly and no full dump were done\n"),
870 dp->host->hostname, qname);
875 /* handle "skip-incr" type archives */
877 if(dp->skip_incr && ep->next_level0 > 0) {
878 g_fprintf(stderr,_("%s:%s lev 1 skipped due to skip-incr flag\n"),
879 dp->host->hostname, qname);
880 /* don't enqueue the disk */
881 askfor(ep, 0, -1, &info);
882 askfor(ep, 1, -1, &info);
883 askfor(ep, 2, -1, &info);
885 g_fprintf(stderr, _("%s: SKIPPED %s %s 1 [skip-incr]\n"),
886 get_pname(), dp->host->hostname, qname);
888 log_add(L_SUCCESS, _("%s %s %s 1 [skipped: skip-incr]"),
889 dp->host->hostname, qname, planner_timestamp);
894 if( ep->last_level == -1 && ep->next_level0 > 0 &&
895 dp->strategy != DS_NOFULL && dp->strategy != DS_INCRONLY &&
896 conf_reserve == 100) {
897 log_add(L_WARNING, _("%s:%s mismatch: no tapelist record, "
898 "but curinfo next_level0: %d."),
899 dp->host->hostname, qname, ep->next_level0);
903 if(ep->last_level == 0) ep->level_days = 0;
904 else ep->level_days = runs_at(&info, ep->last_level);
905 ep->last_lev0size = info.inf[0].csize;
907 ep->fullrate = perf_average(info.full.rate, 0.0);
908 ep->incrrate = perf_average(info.incr.rate, 0.0);
910 ep->fullcomp = perf_average(info.full.comp, dp->comprate[0]);
911 ep->incrcomp = perf_average(info.incr.comp, dp->comprate[1]);
913 /* determine which estimates to get */
917 if (dp->strategy == DS_NOINC ||
919 (!ISSET(info.command, FORCE_BUMP) ||
921 ep->last_level == -1))) {
922 if(info.command & FORCE_BUMP && ep->last_level == -1) {
924 _("Remove force-bump command of %s:%s because it's a new disk."),
925 dp->host->hostname, qname);
927 switch (dp->strategy) {
930 askfor(ep, i++, 0, &info);
932 log_add(L_INFO, _("Ignoring skip_full for %s:%s "
933 "because the strategy is NOINC."),
934 dp->host->hostname, qname);
936 if(info.command & FORCE_BUMP) {
938 _("Ignoring FORCE_BUMP for %s:%s because the strategy is NOINC."),
939 dp->host->hostname, qname);
948 if (ISSET(info.command, FORCE_FULL))
954 if(!dp->skip_incr && !(dp->strategy == DS_NOINC)) {
955 if(ep->last_level == -1) { /* a new disk */
956 if (ep->degr_mesg == NULL)
957 ep->degr_mesg = _("Can't switch to degraded mode when using a new disk");
958 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY) {
959 askfor(ep, i++, 1, &info);
961 assert(!dp->skip_full); /* should be handled above */
963 } else { /* not new, pick normally */
966 curr_level = ep->last_level;
968 if (ISSET(info.command, FORCE_NO_BUMP)) {
969 if(curr_level > 0) { /* level 0 already asked for */
970 askfor(ep, i++, curr_level, &info);
972 log_add(L_INFO,_("Preventing bump of %s:%s as directed."),
973 dp->host->hostname, qname);
974 ep->degr_mesg = _("Can't switch to degraded mode when using a force-no-bump disk");
975 } else if (ISSET(info.command, FORCE_BUMP)
976 && curr_level + 1 < DUMP_LEVELS) {
977 askfor(ep, i++, curr_level+1, &info);
978 log_add(L_INFO,_("Bumping of %s:%s at level %d as directed."),
979 dp->host->hostname, qname, curr_level+1);
980 ep->degr_mesg = _("Can't switch to degraded mode when using a force-bump disk");
981 } else if (curr_level == 0) {
982 askfor(ep, i++, 1, &info);
984 askfor(ep, i++, curr_level, &info);
986 * If last time we dumped less than the threshold, then this
987 * time we will too, OR the extra size will be charged to both
988 * cur_level and cur_level + 1, so we will never bump. Also,
989 * if we haven't been at this level 2 days, or the dump failed
990 * last night, we can't bump.
992 if((info.inf[curr_level].size == (gint64)0 || /* no data, try it anyway */
993 (((info.inf[curr_level].size > bump_thresh(curr_level, info.inf[0].size,dp->bumppercent, dp->bumpsize, dp->bumpmult)))
994 && ep->level_days >= dp->bumpdays))
995 && curr_level + 1 < DUMP_LEVELS) {
996 askfor(ep, i++, curr_level+1, &info);
1002 while(i < MAX_LEVELS) /* mark end of estimates */
1003 askfor(ep, i++, -1, &info);
1007 g_fprintf(stderr, _("setup_estimate: %s:%s: command %u, options: %s "
1008 "last_level %d next_level0 %d level_days %d getting estimates "
1009 "%d (%lld) %d (%lld) %d (%lld)\n"),
1010 dp->host->hostname, qname, info.command,
1011 dp->strategy == DS_NOFULL ? "no-full" :
1012 dp->strategy == DS_INCRONLY ? "incr-only" :
1013 dp->skip_full ? "skip-full" :
1014 dp->skip_incr ? "skip-incr" : "none",
1015 ep->last_level, ep->next_level0, ep->level_days,
1016 ep->level[0], (long long)ep->est_size[0],
1017 ep->level[1], (long long)ep->est_size[1],
1018 ep->level[2], (long long)ep->est_size[2]);
1020 assert(ep->level[0] != -1);
1021 enqueue_disk(&startq, dp);
1025 static int when_overwrite(
1031 runtapes = conf_runtapes;
1032 if(runtapes == 0) runtapes = 1;
1034 if((tp = lookup_tapelabel(label)) == NULL)
1035 return 1; /* "shouldn't happen", but trigger warning message */
1036 else if(tp->reuse == 0)
1038 else if(lookup_nb_tape() > conf_tapecycle)
1039 return (lookup_nb_tape() - tp->position) / runtapes;
1041 return (conf_tapecycle - tp->position) / runtapes;
1044 /* Return the estimated size for a particular dump */
1045 static gint64 est_size(
1051 for(i = 0; i < MAX_LEVELS; i++) {
1052 if(level == est(dp)->level[i])
1053 return est(dp)->est_size[i];
1058 /* Return the estimated on-tape size of a particular dump */
1059 static gint64 est_tape_size(
1066 size = est_size(dp, level);
1068 if(size == (gint64)-1) return size;
1070 if(dp->compress == COMP_NONE)
1073 if(level == 0) ratio = est(dp)->fullcomp;
1074 else ratio = est(dp)->incrcomp;
1077 * make sure over-inflated compression ratios don't throw off the
1078 * estimates, this is mostly for when you have a small dump getting
1079 * compressed which takes up alot more disk/tape space relatively due
1080 * to the overhead of the compression. This is specifically for
1081 * Digital Unix vdump. This patch is courtesy of Rudolf Gabler
1082 * (RUG@USM.Uni-Muenchen.DE)
1085 if(ratio > 1.1) ratio = 1.1;
1087 size = (gint64)((double)size * ratio);
1090 * Ratio can be very small in some error situations, so make sure
1091 * size goes back greater than zero. It may not be right, but
1092 * indicates we did get an estimate.
1094 if(size <= (gint64)0) {
1102 /* what was the level of the last successful dump to tape? */
1103 static int last_level(
1106 int min_pos, min_level, i;
1107 time_t lev0_date, last_date;
1110 if(info->last_level != -1)
1111 return info->last_level;
1113 /* to keep compatibility with old infofile */
1114 min_pos = 1000000000;
1118 for(i = 0; i < 9; i++) {
1119 if(conf_reserve < 100) {
1120 if(i == 0) lev0_date = info->inf[0].date;
1121 else if(info->inf[i].date < lev0_date) continue;
1122 if(info->inf[i].date > last_date) {
1123 last_date = info->inf[i].date;
1128 if((tp = lookup_tapelabel(info->inf[i].label)) == NULL) continue;
1129 /* cull any entries from previous cycles */
1130 if(i == 0) lev0_date = info->inf[0].date;
1131 else if(info->inf[i].date < lev0_date) continue;
1133 if(tp->position < min_pos) {
1134 min_pos = tp->position;
1139 info->last_level = i;
1143 /* when is next level 0 due? 0 = today, 1 = tomorrow, etc*/
1149 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY)
1150 return 1; /* fake it */
1151 else if (dp->strategy == DS_NOINC)
1153 else if(info->inf[0].date < (time_t)0)
1154 return -days_diff(EPOCH, today); /* new disk */
1156 return dp->dumpcycle - days_diff(info->inf[0].date, today);
1159 /* how many runs at current level? */
1164 tape_t *cur_tape, *old_tape;
1167 last = last_level(info);
1168 if(lev != last) return 0;
1169 if(lev == 0) return 1;
1171 if(info->consecutive_runs != -1)
1172 return info->consecutive_runs;
1174 /* to keep compatibility with old infofile */
1175 cur_tape = lookup_tapelabel(info->inf[lev].label);
1176 old_tape = lookup_tapelabel(info->inf[lev-1].label);
1177 if(cur_tape == NULL || old_tape == NULL) return 0;
1179 if(conf_runtapes == 0)
1180 nb_runs = (old_tape->position - cur_tape->position) / 1;
1182 nb_runs = (old_tape->position - cur_tape->position) / conf_runtapes;
1183 info->consecutive_runs = nb_runs;
1189 static gint64 bump_thresh(
1191 gint64 size_level_0,
1198 if ((bumppercent != 0) && (size_level_0 > (gint64)1024)) {
1199 bump = ((double)size_level_0 * (double)bumppercent) / 100.0;
1202 bump = (double)bumpsize;
1204 while(--level) bump = bump * bumpmult;
1206 return (gint64)bump;
1212 * ========================================================================
1213 * GET REMOTE DUMP SIZE ESTIMATES
1217 static void getsize(am_host_t *hostp);
1218 static disk_t *lookup_hostdisk(am_host_t *hp, char *str);
1219 static void handle_result(void *datap, pkt_t *pkt, security_handle_t *sech);
1222 static void get_estimates(void)
1226 int something_started;
1228 something_started = 1;
1229 while(something_started) {
1230 something_started = 0;
1231 for(dp = startq.head; dp != NULL; dp = dp->next) {
1233 if(hostp->up == HOST_READY) {
1234 something_started = 1;
1235 for(dp1 = hostp->disks; dp1 != NULL; dp1 = dp1->hostnext) {
1237 run_server_scripts(EXECUTE_ON_PRE_HOST_ESTIMATE,
1238 get_config_name(), dp1,
1239 est(dp1)->level[0]);
1241 for(dp1 = hostp->disks; dp1 != NULL; dp1 = dp1->hostnext) {
1243 run_server_scripts(EXECUTE_ON_PRE_DLE_ESTIMATE,
1244 get_config_name(), dp1,
1245 est(dp1)->level[0]);
1250 * dp is no longer on startq, so dp->next is not valid
1251 * and we have to start all over.
1259 while(!empty(waitq)) {
1260 disk_t *dp = dequeue_disk(&waitq);
1261 est(dp)->errstr = _("hmm, disk was stranded on waitq");
1262 enqueue_disk(&failq, dp);
1265 while(!empty(pestq)) {
1266 disk_t *dp = dequeue_disk(&pestq);
1267 char * qname = quote_string(dp->name);
1269 if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < (gint64)0) {
1270 if(est(dp)->est_size[0] == (gint64)-1) {
1271 log_add(L_WARNING, _("disk %s:%s, estimate of level %d failed."),
1272 dp->host->hostname, qname, est(dp)->level[0]);
1276 _("disk %s:%s, estimate of level %d timed out."),
1277 dp->host->hostname, qname, est(dp)->level[0]);
1279 est(dp)->level[0] = -1;
1282 if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < (gint64)0) {
1283 if(est(dp)->est_size[1] == (gint64)-1) {
1285 _("disk %s:%s, estimate of level %d failed."),
1286 dp->host->hostname, qname, est(dp)->level[1]);
1290 _("disk %s:%s, estimate of level %d timed out."),
1291 dp->host->hostname, qname, est(dp)->level[1]);
1293 est(dp)->level[1] = -1;
1296 if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < (gint64)0) {
1297 if(est(dp)->est_size[2] == (gint64)-1) {
1299 _("disk %s:%s, estimate of level %d failed."),
1300 dp->host->hostname, qname, est(dp)->level[2]);
1304 _("disk %s:%s, estimate of level %d timed out."),
1305 dp->host->hostname, qname, est(dp)->level[2]);
1307 est(dp)->level[2] = -1;
1310 if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > (gint64)0) ||
1311 (est(dp)->level[1] != -1 && est(dp)->est_size[1] > (gint64)0) ||
1312 (est(dp)->level[2] != -1 && est(dp)->est_size[2] > (gint64)0)) {
1313 enqueue_disk(&estq, dp);
1316 est(dp)->errstr = vstralloc("disk ", qname,
1317 _(", all estimate timed out"), NULL);
1318 enqueue_disk(&failq, dp);
1324 static void getsize(
1327 char number[NUM_STR_SIZE], *req;
1330 time_t estimates, timeout;
1332 const security_driver_t *secdrv;
1334 char * qname, *b64disk = NULL;
1335 char * qdevice, *b64device = NULL;
1337 assert(hostp->disks != NULL);
1339 if(hostp->up != HOST_READY) {
1344 * The first time through here we send a "noop" request. This will
1345 * return the feature list from the client if it supports that.
1346 * If it does not, handle_result() will set the feature list to an
1347 * empty structure. In either case, we do the disks on the second
1348 * (and subsequent) pass(es).
1350 if(hostp->features != NULL) { /* sendsize service */
1354 int has_features = am_has_feature(hostp->features,
1355 fe_req_options_features);
1356 int has_hostname = am_has_feature(hostp->features,
1357 fe_req_options_hostname);
1358 int has_maxdumps = am_has_feature(hostp->features,
1359 fe_req_options_maxdumps);
1360 int has_config = am_has_feature(hostp->features,
1361 fe_req_options_config);
1363 g_snprintf(number, SIZEOF(number), "%d", hostp->maxdumps);
1364 req = vstralloc("SERVICE ", "sendsize", "\n",
1366 has_features ? "features=" : "",
1367 has_features ? our_feature_string : "",
1368 has_features ? ";" : "",
1369 has_maxdumps ? "maxdumps=" : "",
1370 has_maxdumps ? number : "",
1371 has_maxdumps ? ";" : "",
1372 has_hostname ? "hostname=" : "",
1373 has_hostname ? hostp->hostname : "",
1374 has_hostname ? ";" : "",
1375 has_config ? "config=" : "",
1376 has_config ? get_config_name() : "",
1377 has_config ? ";" : "",
1380 req_len = strlen(req);
1381 req_len += 128; /* room for SECURITY ... */
1383 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1387 if(dp->todo == 0) continue;
1389 if(est(dp)->state != DISK_READY) continue;
1391 est(dp)->got_estimate = 0;
1392 if(est(dp)->level[0] == -1) {
1393 est(dp)->state = DISK_DONE;
1397 qname = quote_string(dp->name);
1398 b64disk = amxml_format_tag("disk", dp->name);
1399 qdevice = quote_string(dp->device);
1401 b64device = amxml_format_tag("diskdevice", dp->device);
1402 if (dp->estimate == ES_CLIENT ||
1403 dp->estimate == ES_CALCSIZE ||
1404 (am_has_feature(hostp->features, fe_req_xml) &&
1405 am_has_feature(hostp->features, fe_xml_estimate))) {
1409 if (am_has_feature(hostp->features, fe_req_xml)) {
1410 char *levelstr = NULL;
1411 char *spindlestr = NULL;
1412 char level[NUM_STR_SIZE];
1413 char spindle[NUM_STR_SIZE];
1417 for(i = 0; i < MAX_LEVELS; i++) {
1418 int lev = est(dp)->level[i];
1419 if (lev == -1) break;
1420 g_snprintf(level, SIZEOF(level), "%d", lev);
1421 vstrextend(&levelstr, " <level>",
1423 "</level>\n", NULL);
1425 g_snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1426 spindlestr = vstralloc(" <spindle>",
1428 "</spindle>\n", NULL);
1429 o = xml_optionstr(dp, hostp->features, NULL, 0);
1431 error(_("problem with option string, check the dumptype definition.\n"));
1434 if (strcmp(dp->program,"DUMP") == 0 ||
1435 strcmp(dp->program,"GNUTAR") == 0) {
1436 l = vstralloc("<dle>\n",
1439 "</program>\n", NULL);
1441 l = vstralloc("<dle>\n",
1442 " <program>APPLICATION</program>\n",
1444 if (dp->application) {
1445 char *xml_app = xml_application(dp->application,
1447 vstrextend(&l, xml_app, NULL);
1452 if (am_has_feature(hostp->features, fe_xml_estimate)) {
1453 if (dp->estimate == ES_CLIENT) {
1454 vstrextend(&l, " <estimate>CLIENT</estimate>\n",
1456 } else if (dp->estimate == ES_SERVER) {
1457 vstrextend(&l, " <estimate>SERVER</estimate>\n",
1459 } else if (dp->estimate == ES_CALCSIZE) {
1460 vstrextend(&l, " <estimate>CALCSIZE</estimate>\n",
1464 if (dp->estimate == ES_CALCSIZE) {
1465 if (!am_has_feature(hostp->features,
1466 fe_calcsize_estimate)) {
1468 _("%s:%s does not support CALCSIZE for estimate, using CLIENT.\n"),
1469 hostp->hostname, qname);
1470 dp->estimate = ES_CLIENT;
1472 vstrextend(&l, " <calcsize>YES</calcsize>\n",
1476 vstrextend(&l, " ", b64disk, "\n", NULL);
1478 vstrextend(&l, " ", b64device, "\n", NULL);
1479 vstrextend(&l, levelstr, spindlestr, o, "</dle>\n", NULL);
1483 } else if (strcmp(dp->program,"DUMP") != 0 &&
1484 strcmp(dp->program,"GNUTAR") != 0) {
1485 est(dp)->errstr = newvstrallocf(est(dp)->errstr,
1486 _("does not support application-api"));
1488 for(i = 0; i < MAX_LEVELS; i++) {
1490 char *exclude1 = "";
1491 char *exclude2 = "";
1492 char *excludefree = NULL;
1493 char *include1 = "";
1494 char *include2 = "";
1495 char *includefree = NULL;
1496 char spindle[NUM_STR_SIZE];
1497 char level[NUM_STR_SIZE];
1498 int lev = est(dp)->level[i];
1500 if(lev == -1) break;
1502 g_snprintf(level, SIZEOF(level), "%d", lev);
1503 g_snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1504 if (am_has_feature(hostp->features,
1505 fe_sendsize_req_options)){
1506 exclude1 = " OPTIONS |";
1507 exclude2 = optionstr(dp, hostp->features, NULL);
1508 if ( exclude2 == NULL ) {
1509 error(_("problem with option string, check the dumptype definition.\n"));
1511 excludefree = exclude2;
1514 if (dp->exclude_file &&
1515 dp->exclude_file->nb_element == 1) {
1516 exclude1 = " exclude-file=";
1517 exclude2 = quote_string(
1518 dp->exclude_file->first->name);
1519 excludefree = exclude2;
1521 else if (dp->exclude_list &&
1522 dp->exclude_list->nb_element == 1) {
1523 exclude1 = " exclude-list=";
1524 exclude2 = quote_string(
1525 dp->exclude_list->first->name);
1526 excludefree = exclude2;
1528 if (dp->include_file &&
1529 dp->include_file->nb_element == 1) {
1530 include1 = " include-file=";
1531 include2 = quote_string(
1532 dp->include_file->first->name);
1533 includefree = include2;
1535 else if (dp->include_list &&
1536 dp->include_list->nb_element == 1) {
1537 include1 = " include-list=";
1538 include2 = quote_string(
1539 dp->include_list->first->name);
1540 includefree = include2;
1544 if (dp->estimate == ES_CALCSIZE &&
1545 !am_has_feature(hostp->features,
1546 fe_calcsize_estimate)) {
1548 _("%s:%s does not support CALCSIZE for estimate, using CLIENT.\n"),
1549 hostp->hostname, qname);
1550 dp->estimate = ES_CLIENT;
1552 if(dp->estimate == ES_CLIENT)
1555 calcsize = "CALCSIZE ";
1557 l = vstralloc(calcsize,
1560 " ", dp->device ? qdevice : "",
1562 " ", est(dp)->dumpdate[i],
1564 " ", exclude1, exclude2,
1565 ((includefree != NULL) ? " " : ""),
1572 amfree(includefree);
1573 amfree(excludefree);
1576 remove_disk(&startq, dp);
1582 est(dp)->state = DISK_ACTIVE;
1584 est(dp)->state = DISK_DONE;
1585 if (est(dp)->errstr == NULL) {
1586 est(dp)->errstr = vstrallocf(
1587 _("Can't request estimate"));
1589 enqueue_disk(&failq, dp);
1592 if (dp->estimate == ES_SERVER) {
1595 get_info(dp->host->hostname, dp->name, &info);
1596 for(i = 0; i < MAX_LEVELS; i++) {
1598 int lev = est(dp)->level[i];
1600 if(lev == -1) break;
1601 if(lev == 0) { /* use latest level 0, should do extrapolation */
1602 gint64 est_size = (gint64)0;
1605 for(j=NB_HISTORY-2;j>=0;j--) {
1606 if(info.history[j].level == 0) {
1607 if(info.history[j].size < (gint64)0) continue;
1608 est_size = info.history[j].size;
1613 est(dp)->est_size[i] = est_size;
1615 else if(info.inf[lev].size > (gint64)1000) { /* stats */
1616 est(dp)->est_size[i] = info.inf[lev].size;
1619 est(dp)->est_size[i] = (gint64)1000000;
1622 else if(lev == est(dp)->last_level) {
1623 /* means of all X day at the same level */
1626 gint64 est_size_day[NB_DAY];
1627 int nb_est_day[NB_DAY];
1628 for(j=0;j<NB_DAY;j++) {
1629 est_size_day[j]=(gint64)0;
1633 for(j=NB_HISTORY-2;j>=0;j--) {
1634 if(info.history[j].level <= 0) continue;
1635 if(info.history[j].size < (gint64)0) continue;
1636 if(info.history[j].level==info.history[j+1].level) {
1637 if(nb_day <NB_DAY-1) nb_day++;
1638 est_size_day[nb_day] += info.history[j].size;
1639 nb_est_day[nb_day]++;
1645 nb_day = info.consecutive_runs + 1;
1646 if(nb_day > NB_DAY-1) nb_day = NB_DAY-1;
1648 while(nb_day > 0 && nb_est_day[nb_day] == 0) nb_day--;
1650 if(nb_est_day[nb_day] > 0) {
1651 est(dp)->est_size[i] = est_size_day[nb_day] /
1652 (gint64)nb_est_day[nb_day];
1654 else if(info.inf[lev].size > (gint64)1000) { /* stats */
1655 est(dp)->est_size[i] = info.inf[lev].size;
1658 est(dp)->est_size[i] = (gint64)10000;
1661 else if(lev == est(dp)->last_level + 1) {
1662 /* means of all first day at a new level */
1663 gint64 est_size = (gint64)0;
1666 for(j=NB_HISTORY-2;j>=0;j--) {
1667 if(info.history[j].level <= 0) continue;
1668 if(info.history[j].size < (gint64)0) continue;
1669 if(info.history[j].level == info.history[j+1].level + 1 ) {
1670 est_size += info.history[j].size;
1675 est(dp)->est_size[i] = est_size / (gint64)nb_est;
1677 else if(info.inf[lev].size > (gint64)1000) { /* stats */
1678 est(dp)->est_size[i] = info.inf[lev].size;
1681 est(dp)->est_size[i] = (gint64)100000;
1685 g_fprintf(stderr,_("%s time %s: got result for host %s disk %s:"),
1686 get_pname(), walltime_str(curclock()),
1687 dp->host->hostname, qname);
1688 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1689 est(dp)->level[0], (long long)est(dp)->est_size[0],
1690 est(dp)->level[1], (long long)est(dp)->est_size[1],
1691 est(dp)->level[2], (long long)est(dp)->est_size[2]);
1692 if (!am_has_feature(hostp->features, fe_xml_estimate)) {
1693 est(dp)->state = DISK_DONE;
1694 remove_disk(&startq, dp);
1695 enqueue_disk(&estq, dp);
1702 if(estimates == 0) {
1704 hostp->up = HOST_DONE;
1708 if (conf_etimeout < 0) {
1709 timeout = - conf_etimeout;
1711 timeout = estimates * conf_etimeout;
1713 } else { /* noop service */
1714 req = vstralloc("SERVICE ", "noop", "\n",
1716 "features=", our_feature_string, ";",
1720 * We use ctimeout for the "noop" request because it should be
1721 * very fast and etimeout has other side effects.
1723 timeout = (time_t)getconf_int(CNF_CTIMEOUT);
1726 secdrv = security_getdriver(hostp->disks->security_driver);
1727 if (secdrv == NULL) {
1728 hostp->up = HOST_DONE;
1730 _("Could not find security driver '%s' for host '%s'"),
1731 hostp->disks->security_driver, hostp->hostname);
1735 hostp->up = HOST_ACTIVE;
1737 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1741 if(est(dp)->state == DISK_ACTIVE) {
1742 est(dp)->errstr = NULL;
1743 enqueue_disk(&waitq, dp);
1747 protocol_sendreq(hostp->hostname, secdrv, amhost_get_security_conf,
1748 req, timeout, handle_result, hostp);
1753 static disk_t *lookup_hostdisk(
1754 /*@keep@*/ am_host_t *hp,
1759 for(dp = hp->disks; dp != NULL; dp = dp->hostnext)
1760 if(strcmp(str, dp->name) == 0) return dp;
1766 static void handle_result(
1769 security_handle_t *sech)
1775 char *msg, msg_undo;
1776 char *remoterr, *errbuf = NULL;
1787 hostp = (am_host_t *)datap;
1788 hostp->up = HOST_READY;
1791 errbuf = vstrallocf(_("Request to %s failed: %s"),
1792 hostp->hostname, security_geterror(sech));
1795 if (pkt->type == P_NAK) {
1797 if(strncmp_const_skip(s, "ERROR ", s, ch) == 0) {
1800 goto NAK_parse_failed;
1802 skip_whitespace(s, ch);
1803 if(ch == '\0') goto NAK_parse_failed;
1805 if((s = strchr(remoterr, '\n')) != NULL) {
1806 if(s == remoterr) goto NAK_parse_failed;
1809 if (strcmp(remoterr, "unknown service: noop") != 0
1810 && strcmp(remoterr, "noop: invalid service") != 0) {
1811 errbuf = vstralloc(hostp->hostname, " NAK: ", remoterr, NULL);
1822 if(strncmp_const(line, "OPTIONS ") == 0) {
1823 t = strstr(line, "features=");
1824 if(t != NULL && (g_ascii_isspace((int)t[-1]) || t[-1] == ';')) {
1825 char *u = strchr(t, ';');
1828 t += SIZEOF("features=")-1;
1829 am_release_feature_set(hostp->features);
1830 if((hostp->features = am_string_to_feature(t)) == NULL) {
1831 errbuf = vstrallocf(hostp->hostname,
1832 _(": bad features value: %s\n"), line);
1838 skip_quoted_line(s, ch);
1843 if ((strncmp_const_skip(t, "ERROR ", t, tch) == 0) ||
1844 (strncmp_const_skip(t, "WARNING ", t, tch) == 0)) {
1846 skip_whitespace(t, tch);
1852 * If the "error" is that the "noop" service is unknown, it
1853 * just means the client is "old" (does not support the servie).
1854 * We can ignore this.
1856 if(hostp->features == NULL
1857 && pkt->type == P_NAK
1858 && (strcmp(t - 1, "unknown service: noop") == 0
1859 || strcmp(t - 1, "noop: invalid service") == 0)) {
1860 skip_quoted_line(s, ch);
1864 if (t) /* truncate after the first line */
1866 errbuf = vstralloc(hostp->hostname,
1867 (pkt->type == P_NAK) ? "NAK " : "",
1876 skip_quoted_string(t, tch);
1878 disk = unquote_string(msg);
1880 skip_whitespace(t, tch);
1882 if (sscanf(t - 1, "%d", &level) != 1) {
1886 skip_integer(t, tch);
1887 skip_whitespace(t, tch);
1889 dp = lookup_hostdisk(hostp, disk);
1890 dp = lookup_hostdisk(hostp, disk);
1892 log_add(L_ERROR, _("%s: invalid reply from sendsize: `%s'\n"),
1893 hostp->hostname, line);
1898 if (strncmp_const(t-1,"SIZE ") == 0) {
1899 if (sscanf(t - 1, "SIZE %lld", &size_) != 1) {
1902 size = (gint64)size_;
1903 } else if ((strncmp_const(t-1,"ERROR ") == 0) ||
1904 (strncmp_const(t-1,"WARNING ") == 0)) {
1905 skip_non_whitespace(t, tch);
1906 skip_whitespace(t, tch);
1908 skip_quoted_string(t,tch);
1911 if (pkt->type == P_REP && !est(dp)->errstr) {
1912 est(dp)->errstr = unquote_string(msg);
1921 if (dp->estimate == ES_SERVER) {
1922 if (size == (gint64)-2) {
1923 for(i = 0; i < MAX_LEVELS; i++) {
1924 if (est(dp)->level[i] == level) {
1925 est(dp)->est_size[i] = -1; /* remove estimate */
1929 if(i == MAX_LEVELS) {
1930 goto bad_msg; /* this est wasn't requested */
1934 est(dp)->got_estimate++;
1935 } else if (size > (gint64)-1) {
1936 for(i = 0; i < MAX_LEVELS; i++) {
1937 if(est(dp)->level[i] == level) {
1938 est(dp)->est_size[i] = size;
1942 if(i == MAX_LEVELS) {
1943 goto bad_msg; /* this est wasn't requested */
1945 est(dp)->got_estimate++;
1949 skip_quoted_line(s, ch);
1952 if(hostp->up == HOST_READY && hostp->features == NULL) {
1954 * The client does not support the features list, so give it an
1957 dbprintf(_("no feature set from host %s\n"), hostp->hostname);
1958 hostp->features = am_set_default_feature_set();
1961 security_close_connection(sech, hostp->hostname);
1963 /* XXX what about disks that only got some estimates... do we care? */
1964 /* XXX amanda 2.1 treated that case as a bad msg */
1966 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1967 if(dp->todo == 0) continue;
1968 if(est(dp)->state != DISK_ACTIVE &&
1969 est(dp)->state != DISK_PARTIALY_DONE) continue;
1971 if(est(dp)->state == DISK_ACTIVE) {
1972 remove_disk(&waitq, dp);
1974 else if(est(dp)->state == DISK_PARTIALY_DONE) {
1975 remove_disk(&pestq, dp);
1978 if(pkt->type == P_REP) {
1979 est(dp)->state = DISK_DONE;
1981 else if(pkt->type == P_PREP) {
1982 est(dp)->state = DISK_PARTIALY_DONE;
1985 if(est(dp)->level[0] == -1) continue; /* ignore this disk */
1988 qname = quote_string(dp->name);
1989 if(pkt->type == P_PREP) {
1990 g_fprintf(stderr,_("%s: time %s: got partial result for host %s disk %s:"),
1991 get_pname(), walltime_str(curclock()),
1992 dp->host->hostname, qname);
1993 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1994 est(dp)->level[0], (long long)est(dp)->est_size[0],
1995 est(dp)->level[1], (long long)est(dp)->est_size[1],
1996 est(dp)->level[2], (long long)est(dp)->est_size[2]);
1997 enqueue_disk(&pestq, dp);
1999 else if(pkt->type == P_REP) {
2000 g_fprintf(stderr,_("%s: time %s: got result for host %s disk %s:"),
2001 get_pname(), walltime_str(curclock()),
2002 dp->host->hostname, qname);
2003 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
2004 est(dp)->level[0], (long long)est(dp)->est_size[0],
2005 est(dp)->level[1], (long long)est(dp)->est_size[1],
2006 est(dp)->level[2], (long long)est(dp)->est_size[2]);
2007 if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > (gint64)0) ||
2008 (est(dp)->level[1] != -1 && est(dp)->est_size[1] > (gint64)0) ||
2009 (est(dp)->level[2] != -1 && est(dp)->est_size[2] > (gint64)0)) {
2011 if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < (gint64)0) {
2013 _("disk %s:%s, estimate of level %d failed."),
2014 dp->host->hostname, qname, est(dp)->level[2]);
2015 est(dp)->level[2] = -1;
2017 if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < (gint64)0) {
2019 _("disk %s:%s, estimate of level %d failed."),
2020 dp->host->hostname, qname,
2022 est(dp)->level[1] = -1;
2024 if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < (gint64)0) {
2026 _("disk %s:%s, estimate of level %d failed."),
2027 dp->host->hostname, qname, est(dp)->level[0]);
2028 est(dp)->level[0] = -1;
2030 enqueue_disk(&estq, dp);
2033 enqueue_disk(&failq, dp);
2034 if(est(dp)->got_estimate) {
2035 est(dp)->errstr = vstrallocf("disk %s, all estimate failed",
2040 _("error result for host %s disk %s: missing estimate\n"),
2041 dp->host->hostname, qname);
2042 if (est(dp)->errstr == NULL) {
2043 est(dp)->errstr = vstrallocf(_("missing result for %s in %s response"),
2044 qname, dp->host->hostname);
2048 hostp->up = HOST_DONE;
2050 if (est(dp)->post_dle == 0 &&
2051 (pkt->type == P_REP ||
2052 ((est(dp)->level[0] == -1 || est(dp)->est_size[0] > (gint64)0) &&
2053 (est(dp)->level[1] == -1 || est(dp)->est_size[1] > (gint64)0) &&
2054 (est(dp)->level[2] == -1 || est(dp)->est_size[2] > (gint64)0)))) {
2055 run_server_scripts(EXECUTE_ON_POST_DLE_ESTIMATE,
2056 get_config_name(), dp, est(dp)->level[0]);
2057 est(dp)->post_dle = 1;
2062 if(hostp->up == HOST_DONE) {
2063 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
2065 if (pkt->type == P_REP) {
2066 run_server_scripts(EXECUTE_ON_POST_HOST_ESTIMATE,
2067 get_config_name(), dp, est(dp)->level[0]);
2073 /* try to clean up any defunct processes, since Amanda doesn't wait() for
2075 while(waitpid(-1, NULL, WNOHANG)> 0);
2080 errbuf = vstrallocf(_("%s NAK: [NAK parse failed]"), hostp->hostname);
2081 g_fprintf(stderr, _("got strange nak from %s:\n----\n%s----\n\n"),
2082 hostp->hostname, pkt->body);
2086 g_fprintf(stderr,_("got a bad message, stopped at:\n"));
2088 g_fprintf(stderr,_("----\n%s----\n\n"), line);
2089 errbuf = stralloc2(_("badly formatted response from "), hostp->hostname);
2094 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
2096 if(est(dp)->state == DISK_ACTIVE) {
2097 qname = quote_string(dp->name);
2098 est(dp)->state = DISK_DONE;
2099 remove_disk(&waitq, dp);
2100 enqueue_disk(&failq, dp);
2103 est(dp)->errstr = stralloc(errbuf);
2104 g_fprintf(stderr, _("error result for host %s disk %s: %s\n"),
2105 dp->host->hostname, qname, errbuf);
2112 * If there were no disks involved, make sure the error gets
2115 log_add(L_ERROR, "%s", errbuf);
2117 hostp->up = HOST_DONE;
2119 /* try to clean up any defunct processes, since Amanda doesn't wait() for
2121 while(waitpid(-1, NULL, WNOHANG)> 0);
2128 * ========================================================================
2133 static int schedule_order(disk_t *a, disk_t *b); /* subroutines */
2134 static int pick_inclevel(disk_t *dp);
2136 static void analyze_estimate(
2142 char *qname = quote_string(dp->name);
2146 g_fprintf(stderr, _("pondering %s:%s... "),
2147 dp->host->hostname, qname);
2148 g_fprintf(stderr, _("next_level0 %d last_level %d "),
2149 ep->next_level0, ep->last_level);
2151 if(get_info(dp->host->hostname, dp->name, &info) == 0) {
2155 ep->degr_level = -1;
2156 ep->degr_nsize = (gint64)-1;
2157 ep->degr_csize = (gint64)-1;
2159 if(ep->next_level0 <= 0 || (have_info && ep->last_level == 0
2160 && (info.command & FORCE_NO_BUMP))) {
2161 if(ep->next_level0 <= 0) {
2162 g_fprintf(stderr,_("(due for level 0) "));
2165 ep->dump_nsize = est_size(dp, 0);
2166 ep->dump_csize = est_tape_size(dp, 0);
2167 if(ep->dump_csize <= (gint64)0) {
2169 _("(no estimate for level 0, picking an incr level)\n"));
2170 ep->dump_level = pick_inclevel(dp);
2171 ep->dump_nsize = est_size(dp, ep->dump_level);
2172 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2174 if(ep->dump_nsize == (gint64)-1) {
2175 ep->dump_level = ep->dump_level + 1;
2176 ep->dump_nsize = est_size(dp, ep->dump_level);
2177 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2181 total_lev0 += (double) ep->dump_csize;
2182 if(ep->last_level == -1 || dp->skip_incr) {
2183 g_fprintf(stderr,_("(%s disk, can't switch to degraded mode)\n"),
2184 dp->skip_incr? "skip-incr":_("new"));
2185 if (dp->skip_incr && ep->degr_mesg == NULL) {
2186 ep->degr_mesg = _("Can't switch to degraded mode when using a skip-incr disk");
2188 ep->degr_level = -1;
2189 ep->degr_nsize = (gint64)-1;
2190 ep->degr_csize = (gint64)-1;
2193 /* fill in degraded mode info */
2194 g_fprintf(stderr,_("(picking inclevel for degraded mode)"));
2195 ep->degr_level = pick_inclevel(dp);
2196 ep->degr_nsize = est_size(dp, ep->degr_level);
2197 ep->degr_csize = est_tape_size(dp, ep->degr_level);
2198 if(ep->degr_csize == (gint64)-1) {
2199 ep->degr_level = ep->degr_level + 1;
2200 ep->degr_nsize = est_size(dp, ep->degr_level);
2201 ep->degr_csize = est_tape_size(dp, ep->degr_level);
2203 if(ep->degr_csize == (gint64)-1) {
2204 g_fprintf(stderr,_("(no inc estimate)"));
2205 if (ep->degr_mesg == NULL)
2206 ep->degr_mesg = _("Can't switch to degraded mode because an incremental estimate could not be performed");
2207 ep->degr_level = -1;
2209 g_fprintf(stderr,"\n");
2214 g_fprintf(stderr,_("(not due for a full dump, picking an incr level)\n"));
2215 /* XXX - if this returns -1 may be we should force a total? */
2216 ep->dump_level = pick_inclevel(dp);
2217 ep->dump_nsize = est_size(dp, ep->dump_level);
2218 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2220 if(ep->dump_csize == (gint64)-1) {
2221 ep->dump_level = ep->last_level;
2222 ep->dump_nsize = est_size(dp, ep->dump_level);
2223 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2225 if(ep->dump_csize == (gint64)-1) {
2226 ep->dump_level = ep->last_level + 1;
2227 ep->dump_nsize = est_size(dp, ep->dump_level);
2228 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2230 if(ep->dump_csize == (gint64)-1) {
2232 ep->dump_nsize = est_size(dp, ep->dump_level);
2233 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2235 if (ep->degr_mesg == NULL) {
2236 ep->degr_mesg = _("Can't switch to degraded mode because a full is not planned");
2240 g_fprintf(stderr,_(" curr level %d nsize %lld csize %lld "),
2241 ep->dump_level, (long long)ep->dump_nsize,
2242 (long long)ep->dump_csize);
2244 insert_disk(&schedq, dp, schedule_order);
2246 total_size += (gint64)tt_blocksize_kb + ep->dump_csize + tape_mark;
2248 /* update the balanced size */
2249 if(!(dp->skip_full || dp->strategy == DS_NOFULL ||
2250 dp->strategy == DS_INCRONLY)) {
2253 lev0size = est_tape_size(dp, 0);
2254 if(lev0size == (gint64)-1) lev0size = ep->last_lev0size;
2256 balanced_size += (double)(lev0size / (gint64)runs_per_cycle);
2259 g_fprintf(stderr,_("total size %lld total_lev0 %1.0lf balanced-lev0size %1.0lf\n"),
2260 (long long)total_size, total_lev0, balanced_size);
2262 /* Log errstr even if the estimate succeeded */
2263 /* It can be an error from a script */
2264 if (est(dp)->errstr) {
2265 char *qerrstr = quote_string(est(dp)->errstr);
2266 log_add(L_FAIL, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2267 planner_timestamp, qerrstr);
2274 static void handle_failed(
2277 char *errstr, *errstr1, *qerrstr;
2278 char *qname = quote_string(dp->name);
2280 errstr = est(dp)->errstr? est(dp)->errstr : _("hmm, no error indicator!");
2281 errstr1 = vstralloc("[",errstr,"]", NULL);
2282 qerrstr = quote_string(errstr1);
2285 g_fprintf(stderr, _("%s: FAILED %s %s %s 0 %s\n"),
2286 get_pname(), dp->host->hostname, qname, planner_timestamp, qerrstr);
2288 log_add(L_FAIL, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2289 planner_timestamp, qerrstr);
2293 /* XXX - memory leak with *dp */
2298 * insert-sort by decreasing priority, then
2299 * by decreasing size within priority levels.
2302 static int schedule_order(
2309 diff = est(b)->dump_priority - est(a)->dump_priority;
2310 if(diff != 0) return diff;
2312 ldiff = est(b)->dump_csize - est(a)->dump_csize;
2313 if(ldiff < (gint64)0) return -1; /* XXX - there has to be a better way to dothis */
2314 if(ldiff > (gint64)0) return 1;
2319 static int pick_inclevel(
2322 int base_level, bump_level;
2323 gint64 base_size, bump_size;
2327 base_level = est(dp)->last_level;
2329 /* if last night was level 0, do level 1 tonight, no ifs or buts */
2330 if(base_level == 0) {
2331 g_fprintf(stderr,_(" picklev: last night 0, so tonight level 1\n"));
2335 /* if no-full option set, always do level 1 */
2336 if(dp->strategy == DS_NOFULL) {
2337 g_fprintf(stderr,_(" picklev: no-full set, so always level 1\n"));
2341 base_size = est_size(dp, base_level);
2343 /* if we didn't get an estimate, we can't do an inc */
2344 if(base_size == (gint64)-1) {
2345 base_size = est_size(dp, base_level+1);
2346 if(base_size > (gint64)0) /* FORCE_BUMP */
2347 return base_level+1;
2348 g_fprintf(stderr,_(" picklev: no estimate for level %d, so no incs\n"), base_level);
2352 thresh = bump_thresh(base_level, est_size(dp, 0), dp->bumppercent, dp->bumpsize, dp->bumpmult);
2355 _(" pick: size %lld level %d days %d (thresh %lldK, %d days)\n"),
2356 (long long)base_size, base_level, est(dp)->level_days,
2357 (long long)thresh, dp->bumpdays);
2360 || est(dp)->level_days < dp->bumpdays
2361 || base_size <= thresh)
2364 bump_level = base_level + 1;
2365 bump_size = est_size(dp, bump_level);
2367 if(bump_size == (gint64)-1) return base_level;
2369 g_fprintf(stderr, _(" pick: next size %lld... "),
2370 (long long)bump_size);
2372 if(base_size - bump_size < thresh) {
2373 g_fprintf(stderr, _("not bumped\n"));
2377 qname = quote_string(dp->name);
2378 g_fprintf(stderr, _("BUMPED\n"));
2379 log_add(L_INFO, _("Incremental of %s:%s bumped to level %d."),
2380 dp->host->hostname, qname, bump_level);
2390 ** ========================================================================
2393 ** We have two strategies here:
2397 ** If we are trying to fit too much on the tape something has to go. We
2398 ** try to delay totals until tomorrow by converting them into incrementals
2399 ** and, if that is not effective enough, dropping incrementals altogether.
2400 ** While we are searching for the guilty dump (the one that is really
2401 ** causing the schedule to be oversize) we have probably trampled on a lot of
2402 ** innocent dumps, so we maintain a "before image" list and use this to
2403 ** put back what we can.
2405 ** 2. Promote dumps.
2407 ** We try to keep the amount of tape used by total dumps the same each night.
2408 ** If there is some spare tape in this run we have a look to see if any of
2409 ** tonights incrementals could be promoted to totals and leave us with a
2410 ** more balanced cycle.
2413 static void delay_one_dump(disk_t *dp, int delete, ...);
2414 static int promote_highest_priority_incremental(void);
2415 static int promote_hills(void);
2417 /* delay any dumps that will not fit */
2418 static void delay_dumps(void)
2425 gint64 new_total; /* New total_size */
2426 char est_kb[20]; /* Text formatted dump size */
2427 int nb_forced_level_0;
2433 biq.head = biq.tail = NULL;
2436 ** 1. Delay dumps that are way oversize.
2438 ** Dumps larger that the size of the tapes we are using are just plain
2439 ** not going to fit no matter how many other dumps we drop. Delay
2440 ** oversize totals until tomorrow (by which time my owner will have
2441 ** resolved the problem!) and drop incrementals altogether. Naturally
2442 ** a large total might be delayed into a large incremental so these
2443 ** need to be checked for separately.
2446 for(dp = schedq.head; dp != NULL; dp = ndp) {
2447 int avail_tapes = 1;
2448 if (dp->tape_splitsize > (gint64)0)
2449 avail_tapes = conf_runtapes;
2451 ndp = dp->next; /* remove_disk zaps this */
2453 full_size = est_tape_size(dp, 0);
2454 if (full_size > tapetype_get_length(tape) * (gint64)avail_tapes) {
2455 char *qname = quote_string(dp->name);
2456 if (conf_runtapes > 1 && dp->tape_splitsize == (gint64)0) {
2457 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"
2458 ", you could define a splitsize"),
2459 dp->host->hostname, qname,
2460 (long long)full_size);
2462 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"),
2463 dp->host->hostname, qname,
2464 (long long)full_size);
2469 if (est(dp)->dump_csize == (gint64)-1 ||
2470 est(dp)->dump_csize <= tapetype_get_length(tape) * (gint64)avail_tapes) {
2474 /* Format dumpsize for messages */
2475 g_snprintf(est_kb, 20, "%lld KB,",
2476 (long long)est(dp)->dump_csize);
2478 if(est(dp)->dump_level == 0) {
2481 message = _("but cannot incremental dump skip-incr disk");
2483 else if(est(dp)->last_level < 0) {
2485 message = _("but cannot incremental dump new disk");
2487 else if(est(dp)->degr_level < 0) {
2489 message = _("but no incremental estimate");
2491 else if (est(dp)->degr_csize > tapetype_get_length(tape)) {
2493 message = _("incremental dump also larger than tape");
2497 message = _("full dump delayed");
2502 message = _("skipping incremental");
2504 delay_one_dump(dp, delete, _("dump larger than available tape space,"),
2505 est_kb, message, NULL);
2509 ** 2. Delay total dumps.
2511 ** Delay total dumps until tomorrow (or the day after!). We start with
2512 ** the lowest priority (most dispensable) and work forwards. We take
2513 ** care not to delay *all* the dumps since this could lead to a stale
2514 ** mate [for any one disk there are only three ways tomorrows dump will
2515 ** be smaller than todays: 1. we do a level 0 today so tomorows dump
2516 ** will be a level 1; 2. the disk gets more data so that it is bumped
2517 ** tomorrow (this can be a slow process); and, 3. the disk looses some
2518 ** data (when does that ever happen?)].
2521 nb_forced_level_0 = 0;
2523 for(dp = schedq.head; dp != NULL && preserve == NULL; dp = dp->next)
2524 if(est(dp)->dump_level == 0)
2527 /* 2.a. Do not delay forced full */
2528 for(dp = schedq.tail;
2529 dp != NULL && total_size > tape_length;
2533 if(est(dp)->dump_level != 0) continue;
2535 get_info(dp->host->hostname, dp->name, &info);
2536 if(info.command & FORCE_FULL) {
2537 nb_forced_level_0 += 1;
2542 if(dp != preserve) {
2544 /* Format dumpsize for messages */
2545 g_snprintf(est_kb, 20, "%lld KB,",
2546 (long long)est(dp)->dump_csize);
2550 message = _("but cannot incremental dump skip-incr disk");
2552 else if(est(dp)->last_level < 0) {
2554 message = _("but cannot incremental dump new disk");
2556 else if(est(dp)->degr_level < 0) {
2558 message = _("but no incremental estimate");
2562 message = _("full dump delayed");
2564 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2569 /* 2.b. Delay forced full if needed */
2570 if(nb_forced_level_0 > 0 && total_size > tape_length) {
2571 for(dp = schedq.tail;
2572 dp != NULL && total_size > tape_length;
2576 if(est(dp)->dump_level == 0 && dp != preserve) {
2578 /* Format dumpsize for messages */
2579 g_snprintf(est_kb, 20, "%lld KB,",
2580 (long long)est(dp)->dump_csize);
2584 message = _("but cannot incremental dump skip-incr disk");
2586 else if(est(dp)->last_level < 0) {
2588 message = _("but cannot incremental dump new disk");
2590 else if(est(dp)->degr_level < 0) {
2592 message = _("but no incremental estimate");
2596 message = _("full dump delayed");
2598 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2605 ** 3. Delay incremental dumps.
2607 ** Delay incremental dumps until tomorrow. This is a last ditch attempt
2608 ** at making things fit. Again, we start with the lowest priority (most
2609 ** dispensable) and work forwards.
2612 for(dp = schedq.tail;
2613 dp != NULL && total_size > tape_length;
2617 if(est(dp)->dump_level != 0) {
2619 /* Format dumpsize for messages */
2620 g_snprintf(est_kb, 20, "%lld KB,",
2621 (long long)est(dp)->dump_csize);
2623 delay_one_dump(dp, 1,
2624 _("dumps way too big,"),
2626 _("must skip incremental dumps"),
2632 ** 4. Reinstate delayed dumps.
2634 ** We might not have needed to stomp on all of the dumps we have just
2635 ** delayed above. Try to reinstate them all starting with the last one
2636 ** and working forwards. It is unlikely that the last one will fit back
2637 ** in but why complicate the code?
2640 /*@i@*/ for(bi = biq.tail; bi != NULL; bi = nbi) {
2641 int avail_tapes = 1;
2644 if(dp->tape_splitsize > (gint64)0)
2645 avail_tapes = conf_runtapes;
2648 new_total = total_size + (gint64)tt_blocksize_kb +
2649 bi->csize + (gint64)tape_mark;
2651 new_total = total_size - est(dp)->dump_csize + bi->csize;
2653 if((new_total <= tape_length) &&
2654 (bi->csize < (tapetype_get_length(tape) * (gint64)avail_tapes))) {
2656 total_size = new_total;
2658 if(bi->level == 0) {
2659 total_lev0 += (double) bi->csize;
2661 insert_disk(&schedq, dp, schedule_order);
2664 est(dp)->dump_level = bi->level;
2665 est(dp)->dump_nsize = bi->nsize;
2666 est(dp)->dump_csize = bi->csize;
2670 if(bi->next == NULL)
2671 biq.tail = bi->prev;
2673 (bi->next)->prev = bi->prev;
2674 if(bi->prev == NULL)
2675 biq.head = bi->next;
2677 (bi->prev)->next = bi->next;
2685 ** 5. Output messages about what we have done.
2687 ** We can't output messages while we are delaying dumps because we might
2688 ** reinstate them later. We remember all the messages and output them
2692 /*@i@*/ for(bi = biq.head; bi != NULL; bi = nbi) {
2695 g_fprintf(stderr, "%s: FAILED %s\n", get_pname(), bi->errstr);
2696 log_add(L_FAIL, "%s", bi->errstr);
2700 g_fprintf(stderr, _(" delay: %s now at level %d\n"),
2701 bi->errstr, est(dp)->dump_level);
2702 log_add(L_INFO, "%s", bi->errstr);
2710 g_fprintf(stderr, _(" delay: Total size now %lld.\n"),
2711 (long long)total_size);
2718 * Remove a dump or modify it from full to incremental.
2719 * Keep track of it on the bi q in case we can add it back later.
2722 static void delay_one_dump,
2728 char level_str[NUM_STR_SIZE];
2731 char *qname = quote_string(dp->name);
2732 char *errstr, *qerrstr;
2734 arglist_start(argp, delete);
2736 total_size -= (gint64)tt_blocksize_kb + est(dp)->dump_csize + (gint64)tape_mark;
2737 if(est(dp)->dump_level == 0) {
2738 total_lev0 -= (double) est(dp)->dump_csize;
2741 bi = alloc(SIZEOF(bi_t));
2743 bi->prev = biq.tail;
2744 if(biq.tail == NULL)
2747 biq.tail->next = bi;
2750 bi->deleted = delete;
2752 bi->level = est(dp)->dump_level;
2753 bi->nsize = est(dp)->dump_nsize;
2754 bi->csize = est(dp)->dump_csize;
2756 g_snprintf(level_str, SIZEOF(level_str), "%d", est(dp)->dump_level);
2757 bi->errstr = vstralloc(dp->host->hostname,
2759 " ", planner_timestamp ? planner_timestamp : "?",
2764 while ((next = arglist_val(argp, char *)) != NULL) {
2765 vstrextend(&errstr, sep, next, NULL);
2768 strappend(errstr, "]");
2769 qerrstr = quote_string(errstr);
2770 vstrextend(&bi->errstr, " ", qerrstr, NULL);
2776 remove_disk(&schedq, dp);
2778 est(dp)->dump_level = est(dp)->degr_level;
2779 est(dp)->dump_nsize = est(dp)->degr_nsize;
2780 est(dp)->dump_csize = est(dp)->degr_csize;
2781 total_size += (gint64)tt_blocksize_kb + est(dp)->dump_csize + (gint64)tape_mark;
2788 static int promote_highest_priority_incremental(void)
2790 disk_t *dp, *dp1, *dp_promote;
2791 gint64 new_size, new_total, new_lev0;
2793 int nb_today, nb_same_day, nb_today2;
2794 int nb_disk_today, nb_disk_same_day;
2798 * return 1 if did so; must update total_size correctly; must not
2799 * cause total_size to exceed tape_length
2803 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2805 est(dp)->promote = -1000;
2807 if(est_size(dp,0) <= (gint64)0)
2810 if(est(dp)->next_level0 <= 0)
2813 if(est(dp)->next_level0 > dp->maxpromoteday)
2816 new_size = est_tape_size(dp, 0);
2817 new_total = total_size - est(dp)->dump_csize + new_size;
2818 new_lev0 = (gint64)total_lev0 + new_size;
2823 nb_disk_same_day = 0;
2824 for(dp1 = schedq.head; dp1 != NULL; dp1 = dp1->next) {
2825 if(est(dp1)->dump_level == 0)
2827 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2829 if(strcmp(dp->host->hostname, dp1->host->hostname) == 0) {
2830 if(est(dp1)->dump_level == 0)
2832 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2837 /* do not promote if overflow tape */
2838 if(new_total > tape_length)
2841 /* do not promote if overflow balanced size and something today */
2842 /* promote if nothing today */
2843 if((new_lev0 > (gint64)(balanced_size + balance_threshold)) &&
2844 (nb_disk_today > 0))
2847 /* do not promote if only one disk due that day and nothing today */
2848 if(nb_disk_same_day == 1 && nb_disk_today == 0)
2851 nb_today2 = nb_today*nb_today;
2852 if(nb_today == 0 && nb_same_day > 1)
2855 if(nb_same_day >= nb_today2) {
2856 est(dp)->promote = ((nb_same_day - nb_today2)*(nb_same_day - nb_today2)) +
2857 conf_dumpcycle - est(dp)->next_level0;
2860 est(dp)->promote = -nb_today2 +
2861 conf_dumpcycle - est(dp)->next_level0;
2864 qname = quote_string(dp->name);
2865 if(!dp_promote || est(dp_promote)->promote < est(dp)->promote) {
2867 g_fprintf(stderr," try %s:%s %d %d %d = %d\n",
2868 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2871 g_fprintf(stderr,"no try %s:%s %d %d %d = %d\n",
2872 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2880 qname = quote_string(dp->name);
2881 new_size = est_tape_size(dp, 0);
2882 new_total = total_size - est(dp)->dump_csize + new_size;
2883 new_lev0 = (gint64)total_lev0 + new_size;
2885 total_size = new_total;
2886 total_lev0 = (double)new_lev0;
2887 check_days = est(dp)->next_level0;
2888 est(dp)->degr_level = est(dp)->dump_level;
2889 est(dp)->degr_nsize = est(dp)->dump_nsize;
2890 est(dp)->degr_csize = est(dp)->dump_csize;
2891 est(dp)->dump_level = 0;
2892 est(dp)->dump_nsize = est_size(dp, 0);
2893 est(dp)->dump_csize = new_size;
2894 est(dp)->next_level0 = 0;
2897 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2898 dp->host->hostname, qname,
2899 total_lev0, (long long)total_size);
2902 plural(_("Full dump of %s:%s promoted from %d day ahead."),
2903 _("Full dump of %s:%s promoted from %d days ahead."),
2905 dp->host->hostname, qname, check_days);
2913 static int promote_hills(void)
2916 struct balance_stats {
2928 /* If we are already doing a level 0 don't bother */
2932 /* Do the guts of an "amadmin balance" */
2933 my_dumpcycle = conf_dumpcycle;
2934 if(my_dumpcycle > 10000) my_dumpcycle = 10000;
2936 sp = (struct balance_stats *)
2937 alloc(SIZEOF(struct balance_stats) * my_dumpcycle);
2939 for(days = 0; days < my_dumpcycle; days++) {
2941 sp[days].size = (gint64)0;
2944 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2945 days = est(dp)->next_level0; /* This is > 0 by definition */
2946 if(days<my_dumpcycle && !dp->skip_full && dp->strategy != DS_NOFULL &&
2947 dp->strategy != DS_INCRONLY) {
2949 sp[days].size += est(dp)->last_lev0size;
2953 /* Search for a suitable big hill and cut it down */
2955 /* Find the tallest hill */
2956 hill_size = (gint64)0;
2957 for(days = 0; days < my_dumpcycle; days++) {
2958 if(sp[days].disks > 1 && sp[days].size > hill_size) {
2959 hill_size = sp[days].size;
2964 if(hill_size <= (gint64)0) break; /* no suitable hills */
2966 /* Find all the dumps in that hill and try and remove one */
2967 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2968 if(est(dp)->next_level0 != hill_days ||
2969 est(dp)->next_level0 > dp->maxpromoteday ||
2971 dp->strategy == DS_NOFULL ||
2972 dp->strategy == DS_INCRONLY)
2974 new_size = est_tape_size(dp, 0);
2975 new_total = total_size - est(dp)->dump_csize + new_size;
2976 if(new_total > tape_length)
2978 /* We found a disk we can promote */
2979 qname = quote_string(dp->name);
2980 total_size = new_total;
2981 total_lev0 += (double)new_size;
2982 est(dp)->degr_level = est(dp)->dump_level;
2983 est(dp)->degr_nsize = est(dp)->dump_nsize;
2984 est(dp)->degr_csize = est(dp)->dump_csize;
2985 est(dp)->dump_level = 0;
2986 est(dp)->next_level0 = 0;
2987 est(dp)->dump_nsize = est_size(dp, 0);
2988 est(dp)->dump_csize = new_size;
2991 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2992 dp->host->hostname, qname,
2993 total_lev0, (long long)total_size);
2996 plural(_("Full dump of %s:%s specially promoted from %d day ahead."),
2997 _("Full dump of %s:%s specially promoted from %d days ahead."),
2999 dp->host->hostname, qname, hill_days);
3005 /* All the disks in that hill were unsuitable. */
3006 sp[hill_days].disks = 0; /* Don't get tricked again */
3014 * ========================================================================
3017 * XXX - memory leak - we shouldn't just throw away *dp
3019 static void output_scheduleline(
3023 time_t dump_time = 0, degr_time = 0;
3024 double dump_kps = 0, degr_kps = 0;
3025 char *schedline = NULL, *degr_str = NULL;
3026 char dump_priority_str[NUM_STR_SIZE];
3027 char dump_level_str[NUM_STR_SIZE];
3028 char dump_nsize_str[NUM_STR_SIZE];
3029 char dump_csize_str[NUM_STR_SIZE];
3030 char dump_time_str[NUM_STR_SIZE];
3031 char dump_kps_str[NUM_STR_SIZE];
3032 char degr_level_str[NUM_STR_SIZE];
3033 char degr_nsize_str[NUM_STR_SIZE];
3034 char degr_csize_str[NUM_STR_SIZE];
3035 char degr_time_str[NUM_STR_SIZE];
3036 char degr_kps_str[NUM_STR_SIZE];
3037 char *dump_date, *degr_date;
3040 char *qname = quote_string(dp->name);
3044 if(ep->dump_csize == (gint64)-1) {
3045 /* no estimate, fail the disk */
3047 _("%s: FAILED %s %s %s %d \"[no estimate]\"\n"),
3049 dp->host->hostname, qname, planner_timestamp, ep->dump_level);
3050 log_add(L_FAIL, _("%s %s %s %d [no estimate]"),
3051 dp->host->hostname, qname, planner_timestamp, ep->dump_level);
3056 dump_date = degr_date = (char *)0;
3057 for(i = 0; i < MAX_LEVELS; i++) {
3058 if(ep->dump_level == ep->level[i])
3059 dump_date = ep->dumpdate[i];
3060 if(ep->degr_level == ep->level[i])
3061 degr_date = ep->dumpdate[i];
3064 #define fix_rate(rate) (rate < 1.0 ? DEFAULT_DUMPRATE : rate)
3066 if(ep->dump_level == 0) {
3067 dump_kps = fix_rate(ep->fullrate);
3068 dump_time = (time_t)((double)ep->dump_csize / dump_kps);
3070 if(ep->degr_csize != (gint64)-1) {
3071 degr_kps = fix_rate(ep->incrrate);
3072 degr_time = (time_t)((double)ep->degr_csize / degr_kps);
3076 dump_kps = fix_rate(ep->incrrate);
3077 dump_time = (time_t)((double)ep->dump_csize / dump_kps);
3080 if(ep->dump_level == 0 && ep->degr_csize != (gint64)-1) {
3081 g_snprintf(degr_level_str, sizeof(degr_level_str),
3082 "%d", ep->degr_level);
3083 g_snprintf(degr_nsize_str, sizeof(degr_nsize_str),
3084 "%lld", (long long)ep->degr_nsize);
3085 g_snprintf(degr_csize_str, sizeof(degr_csize_str),
3086 "%lld", (long long)ep->degr_csize);
3087 g_snprintf(degr_time_str, sizeof(degr_time_str),
3088 "%lld", (long long)degr_time);
3089 g_snprintf(degr_kps_str, sizeof(degr_kps_str),
3091 degr_str = vstralloc(" ", degr_level_str,
3093 " ", degr_nsize_str,
3094 " ", degr_csize_str,
3100 if (ep->degr_mesg) {
3101 degr_mesg = quote_string(ep->degr_mesg);
3103 degr_mesg = quote_string(_("Can't switch to degraded mode for unknown reason"));
3105 degr_str = vstralloc(" ", degr_mesg, NULL);
3108 g_snprintf(dump_priority_str, SIZEOF(dump_priority_str),
3109 "%d", ep->dump_priority);
3110 g_snprintf(dump_level_str, SIZEOF(dump_level_str),
3111 "%d", ep->dump_level);
3112 g_snprintf(dump_nsize_str, sizeof(dump_nsize_str),
3113 "%lld", (long long)ep->dump_nsize);
3114 g_snprintf(dump_csize_str, sizeof(dump_csize_str),
3115 "%lld", (long long)ep->dump_csize);
3116 g_snprintf(dump_time_str, sizeof(dump_time_str),
3117 "%lld", (long long)dump_time);
3118 g_snprintf(dump_kps_str, sizeof(dump_kps_str),
3120 features = am_feature_to_string(dp->host->features);
3121 schedline = vstralloc("DUMP ",dp->host->hostname,
3124 " ", planner_timestamp,
3125 " ", dump_priority_str,
3126 " ", dump_level_str,
3128 " ", dump_nsize_str,
3129 " ", dump_csize_str,
3132 degr_str ? degr_str : "",
3135 fputs(schedline, stdout);
3136 fputs(schedline, stderr);