2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1999 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: planner.c 10421 2008-03-06 18:48:30Z martineau $
29 * backup schedule planner for the Amanda backup system.
43 #include "amfeatures.h"
44 #include "server_util.h"
46 #include "timestamp.h"
49 #define planner_debug(i,x) do { \
50 if ((i) <= debug_planner) { \
55 #define MAX_LEVELS 3 /* max# of estimates per filesys */
57 #define RUNS_REDZONE 5 /* should be in conf file? */
59 #define PROMOTE_THRESHOLD 0.05 /* if <5% unbalanced, don't promote */
60 #define DEFAULT_DUMPRATE 1024.0 /* K/s */
62 /* configuration file stuff */
65 gint64 conf_maxdumpsize;
68 int conf_runspercycle;
73 int conf_usetimestamps;
75 #define HOST_READY ((void *)0) /* must be 0 */
76 #define HOST_ACTIVE ((void *)1)
77 #define HOST_DONE ((void *)2)
79 #define DISK_READY 0 /* must be 0 */
81 #define DISK_PARTIALY_DONE 2
84 typedef struct one_est_s {
86 gint64 nsize; /* native size */
87 gint64 csize; /* compressed size */
89 int guessed; /* If server guessed the estimate size */
91 static one_est_t default_one_est = {-1, -1, -1, "INVALID_DATE", 0};
93 typedef struct est_s {
99 one_est_t estimate[MAX_LEVELS];
101 gint64 last_lev0size;
106 double fullrate, incrrate;
107 double fullcomp, incrcomp;
112 #define est(dp) ((est_t *)(dp)->up)
114 /* pestq = partial estimate */
115 disklist_t startq, waitq, pestq, estq, failq, schedq;
117 double total_lev0, balanced_size, balance_threshold;
123 size_t tt_blocksize_kb;
124 int runs_per_cycle = 0;
126 char *planner_timestamp = NULL;
128 static am_feature_t *our_features = NULL;
129 static char *our_feature_string = NULL;
131 /* We keep a LIFO queue of before images for all modifications made
132 * to schedq in our attempt to make the schedule fit on the tape.
133 * Enough information is stored to reinstate a dump if it turns out
134 * that it shouldn't have been touched after all.
136 typedef struct bi_s {
139 int deleted; /* 0=modified, 1=deleted */
140 disk_t *dp; /* The disk that was changed */
141 int level; /* The original level */
142 gint64 nsize; /* The original native size */
143 gint64 csize; /* The original compressed size */
144 char *errstr; /* A message describing why this disk is here */
147 typedef struct bilist_s {
151 bilist_t biq; /* The BI queue itself */
154 * ========================================================================
159 static void setup_estimate(disk_t *dp);
160 static void get_estimates(void);
161 static void analyze_estimate(disk_t *dp);
162 static void handle_failed(disk_t *dp);
163 static void delay_dumps(void);
164 static int promote_highest_priority_incremental(void);
165 static int promote_hills(void);
166 static void output_scheduleline(disk_t *dp);
167 static void server_estimate(disk_t *dp, int i, info_t *info, int level);
168 int main(int, char **);
184 times_t section_start;
188 config_overrides_t *cfg_ovr = NULL;
189 char *cfg_opt = NULL;
191 int exit_status = EXIT_SUCCESS;
194 * Configure program for internationalization:
195 * 1) Only set the message locale for now.
196 * 2) Set textdomain for all amanda related programs to "amanda"
197 * We don't want to be forced to support dozens of message catalogs.
199 setlocale(LC_MESSAGES, "C");
200 textdomain("amanda");
202 /* drop root privileges */
203 planner_setuid = set_root_privs(0);
207 set_pname("planner");
209 dbopen(DBG_SUBDIR_SERVER);
211 cfg_ovr = extract_commandline_config_overrides(&argc, &argv);
215 set_config_overrides(cfg_ovr);
216 config_init(CONFIG_INIT_EXPLICIT_NAME | CONFIG_INIT_USE_CWD, cfg_opt);
218 /* conf_diskfile is freed later, as it may be used in an error message */
219 conf_diskfile = config_dir_relative(getconf_str(CNF_DISKFILE));
220 read_diskfile(conf_diskfile, &origq);
221 disable_skip_disk(&origq);
223 /* Don't die when child closes pipe */
224 signal(SIGPIPE, SIG_IGN);
226 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
228 add_amanda_log_handler(amanda_log_stderr);
229 add_amanda_log_handler(amanda_log_trace_log);
231 if (!planner_setuid) {
232 error(_("planner must be run setuid root"));
235 if (config_errors(NULL) >= CFGERR_ERRORS) {
236 g_critical(_("errors processing config file"));
241 check_running_as(RUNNING_AS_ROOT | RUNNING_AS_UID_ONLY);
243 dbrename(get_config_name(), DBG_SUBDIR_SERVER);
246 section_start = curclock();
248 our_features = am_init_feature_set();
249 our_feature_string = am_feature_to_string(our_features);
251 log_add(L_INFO, "%s pid %ld", get_pname(), (long)getpid());
252 g_fprintf(stderr, _("%s: pid %ld executable %s version %s\n"),
253 get_pname(), (long) getpid(), argv[0], VERSION);
254 for (i = 0; version_info[i] != NULL; i++)
255 g_fprintf(stderr, _("%s: %s"), get_pname(), version_info[i]);
258 if (argc > 3 && strcmp(argv[2], "--starttime") == 0) {
259 planner_timestamp = stralloc(argv[3]);
265 * 1. Networking Setup
272 * 2. Read in Configuration Information
274 * All the Amanda configuration files are loaded before we begin.
277 g_fprintf(stderr,_("READING CONF INFO...\n"));
279 if(origq.head == NULL) {
280 error(_("empty disklist \"%s\""), conf_diskfile);
284 amfree(conf_diskfile);
286 conf_tapelist = config_dir_relative(getconf_str(CNF_TAPELIST));
287 if(read_tapelist(conf_tapelist)) {
288 error(_("could not load tapelist \"%s\""), conf_tapelist);
291 amfree(conf_tapelist);
293 conf_infofile = config_dir_relative(getconf_str(CNF_INFOFILE));
294 if(open_infofile(conf_infofile)) {
295 error(_("could not open info db \"%s\""), conf_infofile);
298 if (check_infofile(conf_infofile, &origq, &errstr) == -1) {
299 log_add(L_WARNING, "problem copying infofile: %s", errstr);
302 amfree(conf_infofile);
304 conf_tapetype = getconf_str(CNF_TAPETYPE);
305 conf_maxdumpsize = getconf_int64(CNF_MAXDUMPSIZE);
306 conf_runtapes = getconf_int(CNF_RUNTAPES);
307 conf_dumpcycle = getconf_int(CNF_DUMPCYCLE);
308 conf_runspercycle = getconf_int(CNF_RUNSPERCYCLE);
309 conf_tapecycle = getconf_int(CNF_TAPECYCLE);
310 conf_etimeout = (time_t)getconf_int(CNF_ETIMEOUT);
311 conf_reserve = getconf_int(CNF_RESERVE);
312 conf_autoflush = getconf_boolean(CNF_AUTOFLUSH);
313 conf_usetimestamps = getconf_boolean(CNF_USETIMESTAMPS);
316 if (planner_timestamp) {
317 if (conf_usetimestamps == 0) {
318 planner_timestamp[8] = '\0';
320 } else if(conf_usetimestamps == 0) {
321 planner_timestamp = get_datestamp_from_time(0);
324 planner_timestamp = get_timestamp_from_time(0);
326 log_add(L_START, _("date %s"), planner_timestamp);
327 g_printf("DATE %s\n", planner_timestamp);
329 g_fprintf(stderr, _("%s: timestamp %s\n"),
330 get_pname(), planner_timestamp);
332 errstr = match_disklist(&origq, argc-diskarg_offset,
333 argv+diskarg_offset);
335 g_fprintf(stderr,"%s",errstr);
336 exit_status = EXIT_FAILURE;
339 for (dp = origq.head; dp != NULL; dp = dp->next) {
341 qname = quote_string(dp->name);
342 log_add(L_DISK, "%s %s", dp->host->hostname, qname);
350 error(_("no DLE to backup; %s"), errstr);
352 error(_("no DLE to backup"));
356 log_add(L_WARNING, "WARNING: %s", errstr);
360 /* some initializations */
362 if(conf_runspercycle == 0) {
363 runs_per_cycle = conf_dumpcycle;
364 } else if(conf_runspercycle == -1 ) {
365 runs_per_cycle = guess_runs_from_tapelist();
367 runs_per_cycle = conf_runspercycle;
369 if (runs_per_cycle <= 0) {
374 * do some basic sanity checking
376 if(conf_tapecycle <= runs_per_cycle) {
377 log_add(L_WARNING, _("tapecycle (%d) <= runspercycle (%d)"),
378 conf_tapecycle, runs_per_cycle);
381 tape = lookup_tapetype(conf_tapetype);
382 if(conf_maxdumpsize > (gint64)0) {
383 tape_length = conf_maxdumpsize;
384 g_fprintf(stderr, "planner: tape_length is set from maxdumpsize (%jd KB)\n",
385 (intmax_t)conf_maxdumpsize);
388 tape_length = tapetype_get_length(tape) * (gint64)conf_runtapes;
389 g_fprintf(stderr, "planner: tape_length is set from tape length (%jd KB) * runtapes (%d) == %jd KB\n",
390 (intmax_t)tapetype_get_length(tape),
392 (intmax_t)tape_length);
394 tape_mark = (size_t)tapetype_get_filemark(tape);
395 tt_blocksize_kb = (size_t)tapetype_get_blocksize(tape);
396 tt_blocksize = tt_blocksize_kb * 1024;
398 g_fprintf(stderr, _("%s: time %s: startup took %s secs\n"),
400 walltime_str(curclock()),
401 walltime_str(timessub(curclock(), section_start)));
404 * 3. Send autoflush dumps left on the holding disks
406 * This should give us something to do while we generate the new
410 g_fprintf(stderr,_("\nSENDING FLUSHES...\n"));
414 GSList *holding_list, *holding_file;
415 char *qdisk, *qhname;
417 /* get *all* flushable files in holding */
418 holding_list = holding_get_files_for_flush(NULL);
419 for(holding_file=holding_list; holding_file != NULL;
420 holding_file = holding_file->next) {
421 holding_file_get_dumpfile((char *)holding_file->data, &file);
423 if (holding_file_size((char *)holding_file->data, 1) <= 0) {
424 log_add(L_INFO, "%s: removing file with no data.",
425 (char *)holding_file->data);
426 holding_file_unlink((char *)holding_file->data);
427 dumpfile_free_data(&file);
431 qdisk = quote_string(file.disk);
432 qhname = quote_string((char *)holding_file->data);
433 log_add(L_DISK, "%s %s", file.name, qdisk);
435 "FLUSH %s %s %s %d %s\n",
442 "FLUSH %s %s %s %d %s\n",
450 dumpfile_free_data(&file);
452 g_slist_free_full(holding_list);
455 g_fprintf(stderr, _("ENDFLUSH\n"));
456 g_fprintf(stdout, _("ENDFLUSH\n"));
460 * 4. Calculate Preliminary Dump Levels
462 * Before we can get estimates from the remote slave hosts, we make a
463 * first attempt at guessing what dump levels we will be dumping at
464 * based on the curinfo database.
467 g_fprintf(stderr,_("\nSETTING UP FOR ESTIMATES...\n"));
468 section_start = curclock();
470 startq.head = startq.tail = NULL;
471 while(!empty(origq)) {
472 disk_t *dp = dequeue_disk(&origq);
478 g_fprintf(stderr, _("%s: time %s: setting up estimates took %s secs\n"),
480 walltime_str(curclock()),
481 walltime_str(timessub(curclock(), section_start)));
485 * 5. Get Dump Size Estimates from Remote Client Hosts
487 * Each host is queried (in parallel) for dump size information on all
488 * of its disks, and the results gathered as they come in.
491 /* go out and get the dump estimates */
493 g_fprintf(stderr,_("\nGETTING ESTIMATES...\n"));
494 section_start = curclock();
496 estq.head = estq.tail = NULL;
497 pestq.head = pestq.tail = NULL;
498 waitq.head = waitq.tail = NULL;
499 failq.head = failq.tail = NULL;
503 g_fprintf(stderr, _("%s: time %s: getting estimates took %s secs\n"),
505 walltime_str(curclock()),
506 walltime_str(timessub(curclock(), section_start)));
509 * At this point, all disks with estimates are in estq, and
510 * all the disks on hosts that didn't respond to our inquiry
514 dump_queue("FAILED", failq, 15, stderr);
515 dump_queue("DONE", estq, 15, stderr);
518 exit_status = EXIT_FAILURE;
522 * 6. Analyze Dump Estimates
524 * Each disk's estimates are looked at to determine what level it
525 * should dump at, and to calculate the expected size and time taking
526 * historical dump rates and compression ratios into account. The
527 * total expected size is accumulated as well.
530 g_fprintf(stderr,_("\nANALYZING ESTIMATES...\n"));
531 section_start = curclock();
533 /* an empty tape still has a label and an endmark */
534 total_size = ((gint64)tt_blocksize_kb + (gint64)tape_mark) * (gint64)2;
538 schedq.head = schedq.tail = NULL;
539 while(!empty(estq)) analyze_estimate(dequeue_disk(&estq));
540 while(!empty(failq)) handle_failed(dequeue_disk(&failq));
543 * At this point, all the disks are on schedq sorted by priority.
544 * The total estimated size of the backups is in total_size.
550 g_fprintf(stderr, _("INITIAL SCHEDULE (size %lld):\n"),
551 (long long)total_size);
552 for(dp = schedq.head; dp != NULL; dp = dp->next) {
553 qname = quote_string(dp->name);
554 g_fprintf(stderr, _(" %s %s pri %d lev %d nsize %lld csize %lld\n"),
555 dp->host->hostname, qname, est(dp)->dump_priority,
556 est(dp)->dump_est->level,
557 (long long)est(dp)->dump_est->nsize,
558 (long long)est(dp)->dump_est->csize);
565 * 7. Delay Dumps if Schedule Too Big
567 * If the generated schedule is too big to fit on the tape, we need to
568 * delay some full dumps to make room. Incrementals will be done
569 * instead (except for new or forced disks).
571 * In extreme cases, delaying all the full dumps is not even enough.
572 * If so, some low-priority incrementals will be skipped completely
573 * until the dumps fit on the tape.
576 g_fprintf(stderr, _("\nDELAYING DUMPS IF NEEDED, total_size %lld, tape length %lld mark %zu\n"),
577 (long long)total_size,
578 (long long)tape_length,
581 initial_size = total_size;
585 /* XXX - why bother checking this? */
586 if(empty(schedq) && total_size < initial_size) {
587 error(_("cannot fit anything on tape, bailing out"));
593 * 8. Promote Dumps if Schedule Too Small
595 * Amanda attempts to balance the full dumps over the length of the
596 * dump cycle. If this night's full dumps are too small relative to
597 * the other nights, promote some high-priority full dumps that will be
598 * due for the next run, to full dumps for tonight, taking care not to
599 * overflow the tape size.
601 * This doesn't work too well for small sites. For these we scan ahead
602 * looking for nights that have an excessive number of dumps and promote
605 * Amanda never delays full dumps just for the sake of balancing the
606 * schedule, so it can take a full cycle to balance the schedule after
611 _("\nPROMOTING DUMPS IF NEEDED, total_lev0 %1.0lf, balanced_size %1.0lf...\n"),
612 total_lev0, balanced_size);
614 balance_threshold = balanced_size * PROMOTE_THRESHOLD;
616 while((balanced_size - total_lev0) > balance_threshold && moved_one)
617 moved_one = promote_highest_priority_incremental();
619 moved_one = promote_hills();
621 g_fprintf(stderr, _("%s: time %s: analysis took %s secs\n"),
623 walltime_str(curclock()),
624 walltime_str(timessub(curclock(), section_start)));
630 * The schedule goes to stdout, presumably to driver. A copy is written
631 * on stderr for the debug file.
634 g_fprintf(stderr,_("\nGENERATING SCHEDULE:\n--------\n"));
636 exit_status = EXIT_FAILURE;
637 g_fprintf(stderr, _("--> Generated empty schedule! <--\n"));
639 while(!empty(schedq)) output_scheduleline(dequeue_disk(&schedq));
641 g_fprintf(stderr, _("--------\n"));
644 log_add(L_FINISH, _("date %s time %s"), planner_timestamp, walltime_str(curclock()));
645 log_add(L_INFO, "pid-done %ld", (long)getpid());
648 amfree(planner_timestamp);
649 amfree(our_feature_string);
650 am_release_feature_set(our_features);
661 * ========================================================================
662 * SETUP FOR ESTIMATES
666 static void askfor(est_t *, int, int, info_t *);
667 static int last_level(info_t *info); /* subroutines */
668 static one_est_t *est_for_level(disk_t *dp, int level);
669 static void est_csize(disk_t *dp, one_est_t *one_est);
670 static gint64 est_tape_size(disk_t *dp, int level);
671 static int next_level0(disk_t *dp, info_t *info);
672 static int runs_at(info_t *info, int lev);
673 static gint64 bump_thresh(int level, gint64 size_level_0, int bumppercent, gint64 bumpsize, double bumpmult);
674 static int when_overwrite(char *label);
677 est_t *ep, /* esimate data block */
678 int seq, /* sequence number of request */
679 int lev, /* dump level being requested */
680 info_t *info) /* info block for disk */
682 if(seq < 0 || seq >= MAX_LEVELS) {
683 error(_("error [planner askfor: seq out of range 0..%d: %d]"),
687 if(lev < -1 || lev >= DUMP_LEVELS) {
688 error(_("error [planner askfor: lev out of range -1..%d: %d]"),
694 ep->estimate[seq].level = -1;
695 ep->estimate[seq].dumpdate = (char *)0;
696 ep->estimate[seq].nsize = (gint64)-3;
697 ep->estimate[seq].csize = (gint64)-3;
698 ep->estimate[seq].guessed = 0;
702 ep->estimate[seq].level = lev;
704 ep->estimate[seq].dumpdate = stralloc(get_dumpdate(info,lev));
706 ep->estimate[seq].nsize = (gint64)-3;
707 ep->estimate[seq].csize = (gint64)-3;
708 ep->estimate[seq].guessed = 0;
723 assert(dp && dp->host);
725 qname = quote_string(dp->name);
726 g_fprintf(stderr, _("%s: time %s: setting up estimates for %s:%s\n"),
727 get_pname(), walltime_str(curclock()),
728 dp->host->hostname, qname);
730 /* get current information about disk */
732 if(get_info(dp->host->hostname, dp->name, &info)) {
733 /* no record for this disk, make a note of it */
734 log_add(L_INFO, _("Adding new disk %s:%s."), dp->host->hostname, qname);
737 if (dp->data_path == DATA_PATH_DIRECTTCP) {
738 if (dp->compress != COMP_NONE) {
739 log_add(L_FAIL, _("%s %s %s 0 [Can't compress directtcp data-path]"),
740 dp->host->hostname, qname, planner_timestamp);
741 g_fprintf(stderr,_("%s:%s lev 0 skipped can't compress directtcp data-path\n"),
742 dp->host->hostname, qname);
746 if (dp->encrypt != ENCRYPT_NONE) {
747 log_add(L_FAIL, _("%s %s %s 0 [Can't encrypt directtcp data-path]"),
748 dp->host->hostname, qname, planner_timestamp);
749 g_fprintf(stderr,_("%s:%s lev 0 skipped can't encrypt directtcp data-path\n"),
750 dp->host->hostname, qname);
754 if (dp->to_holdingdisk == HOLD_REQUIRED) {
755 log_add(L_FAIL, _("%s %s %s 0 [Holding disk can't be use for directtcp data-path]"),
756 dp->host->hostname, qname, planner_timestamp);
757 g_fprintf(stderr,_("%s:%s lev 0 skipped Holding disk can't be use for directtcp data-path\n"),
758 dp->host->hostname, qname);
761 } else if (dp->to_holdingdisk == HOLD_AUTO) {
762 log_add(L_INFO, _("Disabling holding disk for %s:%s."),
763 dp->host->hostname, qname);
764 g_fprintf(stderr,_("%s:%s Disabling holding disk\n"),
765 dp->host->hostname, qname);
766 dp->to_holdingdisk = HOLD_NEVER;
770 /* setup working data struct for disk */
772 ep = alloc(SIZEOF(est_t));
773 dp->up = (void *) ep;
774 ep->state = DISK_READY;
775 ep->dump_priority = dp->priority;
779 ep->degr_mesg = NULL;
780 ep->dump_est = &default_one_est;
781 ep->degr_est = &default_one_est;
783 /* calculated fields */
785 if (ISSET(info.command, FORCE_FULL)) {
786 /* force a level 0, kind of like a new disk */
787 if(dp->strategy == DS_NOFULL) {
789 * XXX - Not sure what it means to force a no-full disk. The
790 * purpose of no-full is to just dump changes relative to a
791 * stable base, for example root partitions that vary only
792 * slightly from a site-wide prototype. Only the variations
795 * If we allow a level 0 onto the Amanda cycle, then we are
796 * hosed when that tape gets re-used next. Disallow this for
800 _("Cannot force full dump of %s:%s with no-full option."),
801 dp->host->hostname, qname);
803 /* clear force command */
804 CLR(info.command, FORCE_FULL);
805 if(put_info(dp->host->hostname, dp->name, &info)) {
806 error(_("could not put info record for %s:%s: %s"),
807 dp->host->hostname, qname, strerror(errno));
810 ep->last_level = last_level(&info);
811 ep->next_level0 = next_level0(dp, &info);
814 ep->degr_mesg = _("Skipping: force-full disk can't be dumped in degraded mode");
816 ep->next_level0 = -conf_dumpcycle;
817 log_add(L_INFO, _("Forcing full dump of %s:%s as directed."),
818 dp->host->hostname, qname);
821 else if(dp->strategy == DS_NOFULL) {
822 /* force estimate of level 1 */
824 ep->next_level0 = next_level0(dp, &info);
827 ep->last_level = last_level(&info);
828 ep->next_level0 = next_level0(dp, &info);
831 /* adjust priority levels */
833 /* warn if dump will be overwritten */
834 if (ep->last_level > -1 && strlen(info.inf[0].label) > 0) {
835 overwrite_runs = when_overwrite(info.inf[0].label);
836 if(overwrite_runs == 0) {
837 log_add(L_WARNING, _("Last full dump of %s:%s "
838 "on tape %s overwritten on this run."),
839 dp->host->hostname, qname, info.inf[0].label);
840 } else if(overwrite_runs <= RUNS_REDZONE) {
842 plural(_("Last full dump of %s:%s on tape %s overwritten in %d run."),
843 _("Last full dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
844 dp->host->hostname, qname, info.inf[0].label,
849 /* warn if last level 1 will be overwritten */
850 if (ep->last_level > 1 && strlen(info.inf[1].label) > 0) {
851 overwrite_runs = when_overwrite(info.inf[1].label);
852 if(overwrite_runs == 0) {
853 log_add(L_WARNING, _("Last level 1 dump of %s:%s "
854 "on tape %s overwritten on this run, resetting to level 1"),
855 dp->host->hostname, qname, info.inf[1].label);
857 } else if(overwrite_runs <= RUNS_REDZONE) {
859 plural(_("Last level 1 dump of %s:%s on tape %s overwritten in %d run."),
860 _("Last level 1 dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
861 dp->host->hostname, qname, info.inf[1].label,
866 if(ep->next_level0 < 0) {
867 g_fprintf(stderr,plural(_("%s:%s overdue %d day for level 0\n"),
868 _("%s:%s overdue %d days for level 0\n"),
870 dp->host->hostname, qname, (-ep->next_level0));
871 ep->dump_priority -= ep->next_level0;
873 else if (ISSET(info.command, FORCE_FULL))
874 ep->dump_priority += 1;
875 /* else XXX bump up the priority of incrementals that failed last night */
877 /* handle external level 0 dumps */
879 if(dp->skip_full && dp->strategy != DS_NOINC) {
880 if(ep->next_level0 <= 0) {
881 /* update the date field */
882 info.inf[0].date = today;
883 CLR(info.command, FORCE_FULL);
884 ep->next_level0 += conf_dumpcycle;
886 if(put_info(dp->host->hostname, dp->name, &info)) {
887 error(_("could not put info record for %s:%s: %s"),
888 dp->host->hostname, qname, strerror(errno));
891 log_add(L_INFO, _("Skipping full dump of %s:%s today."),
892 dp->host->hostname, qname);
893 g_fprintf(stderr,_("%s:%s lev 0 skipped due to skip-full flag\n"),
894 dp->host->hostname, qname);
895 /* don't enqueue the disk */
896 askfor(ep, 0, -1, &info);
897 askfor(ep, 1, -1, &info);
898 askfor(ep, 2, -1, &info);
899 g_fprintf(stderr, _("%s: SKIPPED %s %s 0 [skip-full]\n"),
900 get_pname(), dp->host->hostname, qname);
901 log_add(L_SUCCESS, _("%s %s %s 0 [skipped: skip-full]"),
902 dp->host->hostname, qname, planner_timestamp);
907 if(ep->last_level == -1) {
908 /* probably a new disk, but skip-full means no full! */
912 if(ep->next_level0 == 1) {
913 log_add(L_WARNING, _("Skipping full dump of %s:%s tomorrow."),
914 dp->host->hostname, qname);
918 if(dp->strategy == DS_INCRONLY && ep->last_level == -1 && !ISSET(info.command, FORCE_FULL)) {
919 /* don't enqueue the disk */
920 askfor(ep, 0, -1, &info);
921 askfor(ep, 1, -1, &info);
922 askfor(ep, 2, -1, &info);
923 log_add(L_FAIL, _("%s %s 19000101 1 [Skipping incronly because no full dump were done]"),
924 dp->host->hostname, qname);
925 g_fprintf(stderr,_("%s:%s lev 1 skipped due to strategy incronly and no full dump were done\n"),
926 dp->host->hostname, qname);
931 /* handle "skip-incr" type archives */
933 if(dp->skip_incr && ep->next_level0 > 0) {
934 g_fprintf(stderr,_("%s:%s lev 1 skipped due to skip-incr flag\n"),
935 dp->host->hostname, qname);
936 /* don't enqueue the disk */
937 askfor(ep, 0, -1, &info);
938 askfor(ep, 1, -1, &info);
939 askfor(ep, 2, -1, &info);
941 g_fprintf(stderr, _("%s: SKIPPED %s %s 1 [skip-incr]\n"),
942 get_pname(), dp->host->hostname, qname);
944 log_add(L_SUCCESS, _("%s %s %s 1 [skipped: skip-incr]"),
945 dp->host->hostname, qname, planner_timestamp);
950 if( ep->last_level == -1 && ep->next_level0 > 0 &&
951 dp->strategy != DS_NOFULL && dp->strategy != DS_INCRONLY &&
952 conf_reserve == 100) {
953 log_add(L_WARNING, _("%s:%s mismatch: no tapelist record, "
954 "but curinfo next_level0: %d."),
955 dp->host->hostname, qname, ep->next_level0);
959 if(ep->last_level == 0) ep->level_days = 0;
960 else ep->level_days = runs_at(&info, ep->last_level);
961 ep->last_lev0size = info.inf[0].csize;
963 ep->fullrate = perf_average(info.full.rate, 0.0);
964 ep->incrrate = perf_average(info.incr.rate, 0.0);
966 ep->fullcomp = perf_average(info.full.comp, dp->comprate[0]);
967 ep->incrcomp = perf_average(info.incr.comp, dp->comprate[1]);
969 /* determine which estimates to get */
973 if (dp->strategy == DS_NOINC ||
975 (!ISSET(info.command, FORCE_BUMP) ||
977 ep->last_level == -1))) {
978 if(info.command & FORCE_BUMP && ep->last_level == -1) {
980 _("Remove force-bump command of %s:%s because it's a new disk."),
981 dp->host->hostname, qname);
983 switch (dp->strategy) {
986 askfor(ep, i++, 0, &info);
988 log_add(L_INFO, _("Ignoring skip_full for %s:%s "
989 "because the strategy is NOINC."),
990 dp->host->hostname, qname);
992 if(info.command & FORCE_BUMP) {
994 _("Ignoring FORCE_BUMP for %s:%s because the strategy is NOINC."),
995 dp->host->hostname, qname);
1004 if (ISSET(info.command, FORCE_FULL))
1010 if(!dp->skip_incr && !(dp->strategy == DS_NOINC)) {
1011 if(ep->last_level == -1) { /* a new disk */
1012 if (ep->degr_mesg == NULL)
1013 ep->degr_mesg = _("Skipping: new disk can't be dumped in degraded mode");
1014 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY) {
1015 askfor(ep, i++, 1, &info);
1017 assert(!dp->skip_full); /* should be handled above */
1019 } else { /* not new, pick normally */
1022 curr_level = ep->last_level;
1024 if (ISSET(info.command, FORCE_NO_BUMP)) {
1025 if(curr_level > 0) { /* level 0 already asked for */
1026 askfor(ep, i++, curr_level, &info);
1028 log_add(L_INFO,_("Preventing bump of %s:%s as directed."),
1029 dp->host->hostname, qname);
1030 ep->degr_mesg = _("Skipping: force-no-bump disk can't be dumped in degraded mode");
1031 } else if (ISSET(info.command, FORCE_BUMP)
1032 && curr_level + 1 < DUMP_LEVELS) {
1033 askfor(ep, i++, curr_level+1, &info);
1034 log_add(L_INFO,_("Bumping of %s:%s at level %d as directed."),
1035 dp->host->hostname, qname, curr_level+1);
1036 ep->degr_mesg = _("Skipping: force-bump disk can't be dumped in degraded mode");
1037 } else if (curr_level == 0) {
1038 askfor(ep, i++, 1, &info);
1040 askfor(ep, i++, curr_level, &info);
1042 * If last time we dumped less than the threshold, then this
1043 * time we will too, OR the extra size will be charged to both
1044 * cur_level and cur_level + 1, so we will never bump. Also,
1045 * if we haven't been at this level 2 days, or the dump failed
1046 * last night, we can't bump.
1048 if((info.inf[curr_level].size == (gint64)0 || /* no data, try it anyway */
1049 (((info.inf[curr_level].size > bump_thresh(curr_level, info.inf[0].size,dp->bumppercent, dp->bumpsize, dp->bumpmult)))
1050 && ep->level_days >= dp->bumpdays))
1051 && curr_level + 1 < DUMP_LEVELS) {
1052 askfor(ep, i++, curr_level+1, &info);
1058 while(i < MAX_LEVELS) /* mark end of estimates */
1059 askfor(ep, i++, -1, &info);
1063 g_fprintf(stderr, _("setup_estimate: %s:%s: command %u, options: %s "
1064 "last_level %d next_level0 %d level_days %d getting estimates "
1065 "%d (%lld) %d (%lld) %d (%lld)\n"),
1066 dp->host->hostname, qname, info.command,
1067 dp->strategy == DS_NOFULL ? "no-full" :
1068 dp->strategy == DS_INCRONLY ? "incr-only" :
1069 dp->skip_full ? "skip-full" :
1070 dp->skip_incr ? "skip-incr" : "none",
1071 ep->last_level, ep->next_level0, ep->level_days,
1072 ep->estimate[0].level, (long long)ep->estimate[0].nsize,
1073 ep->estimate[1].level, (long long)ep->estimate[1].nsize,
1074 ep->estimate[2].level, (long long)ep->estimate[2].nsize);
1076 assert(ep->estimate[0].level != -1);
1077 enqueue_disk(&startq, dp);
1081 static int when_overwrite(
1087 runtapes = conf_runtapes;
1088 if(runtapes == 0) runtapes = 1;
1090 if((tp = lookup_tapelabel(label)) == NULL)
1091 return 1; /* "shouldn't happen", but trigger warning message */
1092 else if(tp->reuse == 0)
1094 else if(lookup_nb_tape() > conf_tapecycle)
1095 return (lookup_nb_tape() - tp->position) / runtapes;
1097 return (conf_tapecycle - tp->position) / runtapes;
1100 /* Return the estimated size for a particular dump */
1108 if (level < 0 || level >= DUMP_LEVELS)
1109 return &default_one_est;
1111 for (i = 0; i < MAX_LEVELS; i++) {
1112 if (level == est(dp)->estimate[i].level) {
1113 if (est(dp)->estimate[i].csize <= -1) {
1114 est_csize(dp, &est(dp)->estimate[i]);
1116 return &est(dp)->estimate[i];
1119 return &default_one_est;
1122 /* Return the estimated on-tape size of a particular dump */
1128 gint64 size = one_est->nsize;
1131 if (dp->compress == COMP_NONE) {
1132 one_est->csize = one_est->nsize;
1136 if (one_est->level == 0) ratio = est(dp)->fullcomp;
1137 else ratio = est(dp)->incrcomp;
1140 * make sure over-inflated compression ratios don't throw off the
1141 * estimates, this is mostly for when you have a small dump getting
1142 * compressed which takes up alot more disk/tape space relatively due
1143 * to the overhead of the compression. This is specifically for
1144 * Digital Unix vdump. This patch is courtesy of Rudolf Gabler
1145 * (RUG@USM.Uni-Muenchen.DE)
1148 if (ratio > 1.1) ratio = 1.1;
1150 size = (gint64)((double)size * ratio);
1153 * Ratio can be very small in some error situations, so make sure
1154 * size goes back greater than zero. It may not be right, but
1155 * indicates we did get an estimate.
1157 if (size <= (gint64)0) {
1161 one_est->csize = size;
1164 static gint64 est_tape_size(
1168 one_est_t *dump_est;
1170 dump_est = est_for_level(dp, level);
1171 if (dump_est->csize <= -1)
1172 est_csize(dp, dump_est);
1173 return dump_est->csize;
1177 /* what was the level of the last successful dump to tape? */
1178 static int last_level(
1181 int min_pos, min_level, i;
1182 time_t lev0_date, last_date;
1185 if(info->last_level != -1)
1186 return info->last_level;
1188 /* to keep compatibility with old infofile */
1189 min_pos = 1000000000;
1193 for(i = 0; i < 9; i++) {
1194 if(conf_reserve < 100) {
1195 if(i == 0) lev0_date = info->inf[0].date;
1196 else if(info->inf[i].date < lev0_date) continue;
1197 if(info->inf[i].date > last_date) {
1198 last_date = info->inf[i].date;
1203 if((tp = lookup_tapelabel(info->inf[i].label)) == NULL) continue;
1204 /* cull any entries from previous cycles */
1205 if(i == 0) lev0_date = info->inf[0].date;
1206 else if(info->inf[i].date < lev0_date) continue;
1208 if(tp->position < min_pos) {
1209 min_pos = tp->position;
1214 info->last_level = i;
1218 /* when is next level 0 due? 0 = today, 1 = tomorrow, etc*/
1224 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY)
1225 return 1; /* fake it */
1226 else if (dp->strategy == DS_NOINC)
1228 else if(info->inf[0].date < (time_t)0)
1229 return -days_diff(EPOCH, today); /* new disk */
1231 return dp->dumpcycle - days_diff(info->inf[0].date, today);
1234 /* how many runs at current level? */
1239 tape_t *cur_tape, *old_tape;
1242 last = last_level(info);
1243 if(lev != last) return 0;
1244 if(lev == 0) return 1;
1246 if(info->consecutive_runs != -1)
1247 return info->consecutive_runs;
1249 /* to keep compatibility with old infofile */
1250 cur_tape = lookup_tapelabel(info->inf[lev].label);
1251 old_tape = lookup_tapelabel(info->inf[lev-1].label);
1252 if(cur_tape == NULL || old_tape == NULL) return 0;
1254 if(conf_runtapes == 0)
1255 nb_runs = (old_tape->position - cur_tape->position) / 1;
1257 nb_runs = (old_tape->position - cur_tape->position) / conf_runtapes;
1258 info->consecutive_runs = nb_runs;
1264 static gint64 bump_thresh(
1266 gint64 size_level_0,
1273 if ((bumppercent != 0) && (size_level_0 > (gint64)1024)) {
1274 bump = ((double)size_level_0 * (double)bumppercent) / 100.0;
1277 bump = (double)bumpsize;
1279 while(--level) bump = bump * bumpmult;
1281 return (gint64)bump;
1287 * ========================================================================
1288 * GET REMOTE DUMP SIZE ESTIMATES
1292 static void getsize(am_host_t *hostp);
1293 static disk_t *lookup_hostdisk(am_host_t *hp, char *str);
1294 static void handle_result(void *datap, pkt_t *pkt, security_handle_t *sech);
1297 static void get_estimates(void)
1301 int something_started;
1303 something_started = 1;
1304 while(something_started) {
1305 something_started = 0;
1306 for(dp = startq.head; dp != NULL; dp = dp->next) {
1308 if(hostp->up == HOST_READY) {
1309 something_started = 1;
1310 for(dp1 = hostp->disks; dp1 != NULL; dp1 = dp1->hostnext) {
1312 run_server_scripts(EXECUTE_ON_PRE_HOST_ESTIMATE,
1313 get_config_name(), dp1,
1314 est(dp1)->estimate[0].level);
1316 for(dp1 = hostp->disks; dp1 != NULL; dp1 = dp1->hostnext) {
1318 run_server_scripts(EXECUTE_ON_PRE_DLE_ESTIMATE,
1319 get_config_name(), dp1,
1320 est(dp1)->estimate[0].level);
1325 * dp is no longer on startq, so dp->next is not valid
1326 * and we have to start all over.
1334 while(!empty(waitq)) {
1335 disk_t *dp = dequeue_disk(&waitq);
1336 est(dp)->errstr = _("hmm, disk was stranded on waitq");
1337 enqueue_disk(&failq, dp);
1340 while(!empty(pestq)) {
1341 disk_t *dp = dequeue_disk(&pestq);
1342 char * qname = quote_string(dp->name);
1345 for (i=0; i < MAX_LEVELS; i++) {
1346 if (est(dp)->estimate[i].level != -1 &&
1347 est(dp)->estimate[i].nsize < (gint64)0) {
1348 if (est(dp)->estimate[i].nsize == (gint64)-3) {
1350 _("disk %s:%s, estimate of level %d timed out."),
1351 dp->host->hostname, qname, est(dp)->estimate[i].level);
1353 est(dp)->estimate[i].level = -1;
1357 if ((est(dp)->estimate[0].level != -1 &&
1358 est(dp)->estimate[0].nsize > (gint64)0) ||
1359 (est(dp)->estimate[1].level != -1 &&
1360 est(dp)->estimate[1].nsize > (gint64)0) ||
1361 (est(dp)->estimate[2].level != -1 &&
1362 est(dp)->estimate[2].nsize > (gint64)0)) {
1363 enqueue_disk(&estq, dp);
1366 est(dp)->errstr = vstralloc("disk ", qname,
1367 _(", all estimate timed out"), NULL);
1368 enqueue_disk(&failq, dp);
1374 static void getsize(
1377 char number[NUM_STR_SIZE], *req;
1380 time_t estimates, timeout;
1382 const security_driver_t *secdrv;
1384 char * qname, *b64disk = NULL;
1385 char * qdevice, *b64device = NULL;
1386 estimate_t estimate;
1389 assert(hostp->disks != NULL);
1391 if(hostp->up != HOST_READY) {
1396 * The first time through here we send a "noop" request. This will
1397 * return the feature list from the client if it supports that.
1398 * If it does not, handle_result() will set the feature list to an
1399 * empty structure. In either case, we do the disks on the second
1400 * (and subsequent) pass(es).
1402 if(hostp->features != NULL) { /* sendsize service */
1406 int has_features = am_has_feature(hostp->features,
1407 fe_req_options_features);
1408 int has_hostname = am_has_feature(hostp->features,
1409 fe_req_options_hostname);
1410 int has_maxdumps = am_has_feature(hostp->features,
1411 fe_req_options_maxdumps);
1412 int has_config = am_has_feature(hostp->features,
1413 fe_req_options_config);
1415 g_snprintf(number, SIZEOF(number), "%d", hostp->maxdumps);
1416 req = vstralloc("SERVICE ", "sendsize", "\n",
1418 has_features ? "features=" : "",
1419 has_features ? our_feature_string : "",
1420 has_features ? ";" : "",
1421 has_maxdumps ? "maxdumps=" : "",
1422 has_maxdumps ? number : "",
1423 has_maxdumps ? ";" : "",
1424 has_hostname ? "hostname=" : "",
1425 has_hostname ? hostp->hostname : "",
1426 has_hostname ? ";" : "",
1427 has_config ? "config=" : "",
1428 has_config ? get_config_name() : "",
1429 has_config ? ";" : "",
1432 req_len = strlen(req);
1433 req_len += 128; /* room for SECURITY ... */
1435 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1439 GPtrArray *errarray;
1441 if(dp->todo == 0) continue;
1443 if(est(dp)->state != DISK_READY) continue;
1445 est(dp)->got_estimate = 0;
1446 if (est(dp)->estimate[0].level == -1) {
1447 est(dp)->state = DISK_DONE;
1451 qname = quote_string(dp->name);
1453 errarray = validate_optionstr(dp);
1454 if (errarray->len > 0) {
1456 for (i=0; i < errarray->len; i++) {
1457 log_add(L_FAIL, _("%s %s %s 0 [%s]"),
1458 dp->host->hostname, qname,
1460 (char *)g_ptr_array_index(errarray, i));
1463 est(dp)->state = DISK_DONE;
1467 b64disk = amxml_format_tag("disk", dp->name);
1468 qdevice = quote_string(dp->device);
1469 estimate = (estimate_t)GPOINTER_TO_INT(dp->estimatelist->data);
1471 b64device = amxml_format_tag("diskdevice", dp->device);
1473 estimate = ES_CLIENT;
1474 for (el = dp->estimatelist; el != NULL; el = el->next) {
1475 estimate = (estimate_t)GPOINTER_TO_INT(el->data);
1476 if (estimate == ES_SERVER)
1479 if (estimate == ES_SERVER) {
1482 get_info(dp->host->hostname, dp->name, &info);
1483 for(i = 0; i < MAX_LEVELS; i++) {
1484 int lev = est(dp)->estimate[i].level;
1486 if(lev == -1) break;
1487 server_estimate(dp, i, &info, lev);
1489 g_fprintf(stderr,_("%s time %s: got result for host %s disk %s:"),
1490 get_pname(), walltime_str(curclock()),
1491 dp->host->hostname, qname);
1492 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1493 est(dp)->estimate[0].level,
1494 (long long)est(dp)->estimate[0].nsize,
1495 est(dp)->estimate[1].level,
1496 (long long)est(dp)->estimate[1].nsize,
1497 est(dp)->estimate[2].level,
1498 (long long)est(dp)->estimate[2].nsize);
1499 if (!am_has_feature(hostp->features, fe_xml_estimate)) {
1500 est(dp)->state = DISK_DONE;
1501 remove_disk(&startq, dp);
1502 enqueue_disk(&estq, dp);
1506 estimate = ES_SERVER;
1507 for (el = dp->estimatelist; el != NULL; el = el->next) {
1508 estimate = (estimate_t)GPOINTER_TO_INT(el->data);
1509 if (estimate == ES_CLIENT || estimate == ES_CALCSIZE)
1512 if (estimate == ES_CLIENT ||
1513 estimate == ES_CALCSIZE ||
1514 (am_has_feature(hostp->features, fe_req_xml) &&
1515 am_has_feature(hostp->features, fe_xml_estimate))) {
1519 if (am_has_feature(hostp->features, fe_req_xml)) {
1520 char *levelstr = NULL;
1521 char *spindlestr = NULL;
1522 char level[NUM_STR_SIZE];
1523 char spindle[NUM_STR_SIZE];
1528 get_info(dp->host->hostname, dp->name, &info);
1529 for(i = 0; i < MAX_LEVELS; i++) {
1531 int lev = est(dp)->estimate[i].level;
1532 if (lev == -1) break;
1533 g_snprintf(level, SIZEOF(level), "%d", lev);
1534 if (am_has_feature(hostp->features, fe_xml_level_server) &&
1535 server_can_do_estimate(dp, &info, lev)) {
1536 server = "<server>YES</server>";
1540 vstrextend(&levelstr, " <level>",
1542 "</level>\n", NULL);
1544 g_snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1545 spindlestr = vstralloc(" <spindle>",
1547 "</spindle>\n", NULL);
1548 o = xml_optionstr(dp, 0);
1550 if (strcmp(dp->program,"DUMP") == 0 ||
1551 strcmp(dp->program,"GNUTAR") == 0) {
1552 l = vstralloc("<dle>\n",
1555 "</program>\n", NULL);
1557 l = vstralloc("<dle>\n",
1558 " <program>APPLICATION</program>\n",
1560 if (dp->application) {
1561 application_t *application;
1564 application = lookup_application(dp->application);
1565 g_assert(application != NULL);
1566 xml_app = xml_application(dp, application,
1568 vstrextend(&l, xml_app, NULL);
1573 es = xml_estimate(dp->estimatelist, hostp->features);
1574 vstrextend(&l, es, "\n", NULL);
1576 vstrextend(&l, " ", b64disk, "\n", NULL);
1578 vstrextend(&l, " ", b64device, "\n", NULL);
1579 vstrextend(&l, levelstr, spindlestr, o, "</dle>\n", NULL);
1583 } else if (strcmp(dp->program,"DUMP") != 0 &&
1584 strcmp(dp->program,"GNUTAR") != 0) {
1585 est(dp)->errstr = newvstrallocf(est(dp)->errstr,
1586 _("does not support application-api"));
1588 for(i = 0; i < MAX_LEVELS; i++) {
1590 char *exclude1 = "";
1591 char *exclude2 = "";
1592 char *excludefree = NULL;
1593 char *include1 = "";
1594 char *include2 = "";
1595 char *includefree = NULL;
1596 char spindle[NUM_STR_SIZE];
1597 char level[NUM_STR_SIZE];
1598 int lev = est(dp)->estimate[i].level;
1600 if(lev == -1) break;
1602 g_snprintf(level, SIZEOF(level), "%d", lev);
1603 g_snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1604 if (am_has_feature(hostp->features,
1605 fe_sendsize_req_options)){
1606 exclude1 = " OPTIONS |";
1607 exclude2 = optionstr(dp);
1608 if ( exclude2 == NULL ) {
1609 error(_("problem with option string, check the dumptype definition.\n"));
1611 excludefree = exclude2;
1614 if (dp->exclude_file &&
1615 dp->exclude_file->nb_element == 1) {
1616 exclude1 = " exclude-file=";
1617 exclude2 = quote_string(
1618 dp->exclude_file->first->name);
1619 excludefree = exclude2;
1621 else if (dp->exclude_list &&
1622 dp->exclude_list->nb_element == 1) {
1623 exclude1 = " exclude-list=";
1624 exclude2 = quote_string(
1625 dp->exclude_list->first->name);
1626 excludefree = exclude2;
1628 if (dp->include_file &&
1629 dp->include_file->nb_element == 1) {
1630 include1 = " include-file=";
1631 include2 = quote_string(
1632 dp->include_file->first->name);
1633 includefree = include2;
1635 else if (dp->include_list &&
1636 dp->include_list->nb_element == 1) {
1637 include1 = " include-list=";
1638 include2 = quote_string(
1639 dp->include_list->first->name);
1640 includefree = include2;
1644 if (estimate == ES_CALCSIZE &&
1645 !am_has_feature(hostp->features,
1646 fe_calcsize_estimate)) {
1648 _("%s:%s does not support CALCSIZE for estimate, using CLIENT.\n"),
1649 hostp->hostname, qname);
1650 estimate = ES_CLIENT;
1652 if(estimate == ES_CLIENT)
1655 calcsize = "CALCSIZE ";
1657 l = vstralloc(calcsize,
1660 " ", dp->device ? qdevice : "",
1662 " ", est(dp)->estimate[i].dumpdate,
1664 " ", exclude1, exclude2,
1665 ((includefree != NULL) ? " " : ""),
1672 amfree(includefree);
1673 amfree(excludefree);
1681 if (est(dp)->state == DISK_DONE) {
1682 remove_disk(&estq, dp);
1683 est(dp)->state = DISK_PARTIALY_DONE;
1684 enqueue_disk(&pestq, dp);
1686 remove_disk(&startq, dp);
1687 est(dp)->state = DISK_ACTIVE;
1689 } else if (est(dp)->state != DISK_DONE) {
1690 remove_disk(&startq, dp);
1691 est(dp)->state = DISK_DONE;
1692 if (est(dp)->errstr == NULL) {
1693 est(dp)->errstr = vstrallocf(
1694 _("Can't request estimate"));
1696 enqueue_disk(&failq, dp);
1703 if(estimates == 0) {
1705 hostp->up = HOST_DONE;
1709 if (conf_etimeout < 0) {
1710 timeout = - conf_etimeout;
1712 timeout = estimates * conf_etimeout;
1714 } else { /* noop service */
1715 req = vstralloc("SERVICE ", "noop", "\n",
1717 "features=", our_feature_string, ";",
1721 * We use ctimeout for the "noop" request because it should be
1722 * very fast and etimeout has other side effects.
1724 timeout = (time_t)getconf_int(CNF_CTIMEOUT);
1727 secdrv = security_getdriver(hostp->disks->auth);
1728 if (secdrv == NULL) {
1729 hostp->up = HOST_DONE;
1731 _("Could not find security driver '%s' for host '%s'"),
1732 hostp->disks->auth, hostp->hostname);
1736 hostp->up = HOST_ACTIVE;
1738 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1742 if(est(dp)->state == DISK_ACTIVE) {
1743 est(dp)->errstr = NULL;
1744 enqueue_disk(&waitq, dp);
1748 protocol_sendreq(hostp->hostname, secdrv, amhost_get_security_conf,
1749 req, timeout, handle_result, hostp);
1754 static disk_t *lookup_hostdisk(
1755 /*@keep@*/ am_host_t *hp,
1760 for(dp = hp->disks; dp != NULL; dp = dp->hostnext)
1761 if(strcmp(str, dp->name) == 0) return dp;
1767 static void handle_result(
1770 security_handle_t *sech)
1776 char *msg, msg_undo;
1777 char *remoterr, *errbuf = NULL;
1788 hostp = (am_host_t *)datap;
1789 hostp->up = HOST_READY;
1792 errbuf = vstrallocf(_("Request to %s failed: %s"),
1793 hostp->hostname, security_geterror(sech));
1796 if (pkt->type == P_NAK) {
1798 if(strncmp_const_skip(s, "ERROR ", s, ch) == 0) {
1801 goto NAK_parse_failed;
1803 skip_whitespace(s, ch);
1804 if(ch == '\0') goto NAK_parse_failed;
1806 if((s = strchr(remoterr, '\n')) != NULL) {
1807 if(s == remoterr) goto NAK_parse_failed;
1810 if (strcmp(remoterr, "unknown service: noop") != 0
1811 && strcmp(remoterr, "noop: invalid service") != 0) {
1812 errbuf = vstralloc(hostp->hostname, " NAK: ", remoterr, NULL);
1823 if(strncmp_const(line, "OPTIONS ") == 0) {
1824 t = strstr(line, "features=");
1825 if(t != NULL && (g_ascii_isspace((int)t[-1]) || t[-1] == ';')) {
1826 char *u = strchr(t, ';');
1829 t += SIZEOF("features=")-1;
1830 am_release_feature_set(hostp->features);
1831 if((hostp->features = am_string_to_feature(t)) == NULL) {
1832 errbuf = vstrallocf(hostp->hostname,
1833 _(": bad features value: %s\n"), line);
1839 skip_quoted_line(s, ch);
1844 if ((strncmp_const_skip(t, "ERROR ", t, tch) == 0) ||
1845 (strncmp_const_skip(t, "WARNING ", t, tch) == 0)) {
1847 skip_whitespace(t, tch);
1853 * If the "error" is that the "noop" service is unknown, it
1854 * just means the client is "old" (does not support the servie).
1855 * We can ignore this.
1857 if(hostp->features == NULL
1858 && pkt->type == P_NAK
1859 && (strcmp(t - 1, "unknown service: noop") == 0
1860 || strcmp(t - 1, "noop: invalid service") == 0)) {
1861 skip_quoted_line(s, ch);
1865 if (t) /* truncate after the first line */
1867 errbuf = vstralloc(hostp->hostname,
1868 (pkt->type == P_NAK) ? "NAK " : "",
1877 skip_quoted_string(t, tch);
1879 disk = unquote_string(msg);
1881 skip_whitespace(t, tch);
1883 if (sscanf(t - 1, "%d", &level) != 1) {
1887 skip_integer(t, tch);
1888 skip_whitespace(t, tch);
1890 dp = lookup_hostdisk(hostp, disk);
1892 log_add(L_ERROR, _("%s: invalid reply from sendsize: `%s'\n"),
1893 hostp->hostname, line);
1898 if (strncmp_const(t-1,"SIZE ") == 0) {
1899 if (sscanf(t - 1, "SIZE %lld", &size_) != 1) {
1902 size = (gint64)size_;
1903 } else if ((strncmp_const(t-1,"ERROR ") == 0) ||
1904 (strncmp_const(t-1,"WARNING ") == 0)) {
1905 skip_non_whitespace(t, tch);
1906 skip_whitespace(t, tch);
1908 skip_quoted_string(t,tch);
1911 if (pkt->type == P_REP && !est(dp)->errstr) {
1912 est(dp)->errstr = unquote_string(msg);
1921 for (i = 0; i < MAX_LEVELS; i++) {
1922 if (est(dp)->estimate[i].level == level) {
1923 if (size == (gint64)-2) {
1924 est(dp)->estimate[i].nsize = -1; /* remove estimate */
1925 est(dp)->estimate[i].guessed = 0;
1926 } else if (size > (gint64)-1) {
1927 /* take the size returned by the client */
1928 est(dp)->estimate[i].nsize = size;
1929 est(dp)->estimate[i].guessed = 0;
1934 if (i == MAX_LEVELS) {
1935 goto bad_msg; /* this est wasn't requested */
1937 est(dp)->got_estimate++;
1941 skip_quoted_line(s, ch);
1944 if(hostp->up == HOST_READY && hostp->features == NULL) {
1946 * The client does not support the features list, so give it an
1949 dbprintf(_("no feature set from host %s\n"), hostp->hostname);
1950 hostp->features = am_set_default_feature_set();
1953 security_close_connection(sech, hostp->hostname);
1955 /* XXX what about disks that only got some estimates... do we care? */
1956 /* XXX amanda 2.1 treated that case as a bad msg */
1958 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1959 if(dp->todo == 0) continue;
1960 if(est(dp)->state != DISK_ACTIVE &&
1961 est(dp)->state != DISK_PARTIALY_DONE) continue;
1963 if(est(dp)->state == DISK_ACTIVE) {
1964 remove_disk(&waitq, dp);
1966 else if(est(dp)->state == DISK_PARTIALY_DONE) {
1967 remove_disk(&pestq, dp);
1970 if(pkt->type == P_REP) {
1971 est(dp)->state = DISK_DONE;
1973 else if(pkt->type == P_PREP) {
1974 est(dp)->state = DISK_PARTIALY_DONE;
1977 if (est(dp)->estimate[0].level == -1) continue; /* ignore this disk */
1980 qname = quote_string(dp->name);
1981 if(pkt->type == P_PREP) {
1982 g_fprintf(stderr,_("%s: time %s: got partial result for host %s disk %s:"),
1983 get_pname(), walltime_str(curclock()),
1984 dp->host->hostname, qname);
1985 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1986 est(dp)->estimate[0].level,
1987 (long long)est(dp)->estimate[0].nsize,
1988 est(dp)->estimate[1].level,
1989 (long long)est(dp)->estimate[1].nsize,
1990 est(dp)->estimate[2].level,
1991 (long long)est(dp)->estimate[2].nsize);
1992 enqueue_disk(&pestq, dp);
1994 else if(pkt->type == P_REP) {
1995 g_fprintf(stderr,_("%s: time %s: got result for host %s disk %s:"),
1996 get_pname(), walltime_str(curclock()),
1997 dp->host->hostname, qname);
1998 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1999 est(dp)->estimate[0].level,
2000 (long long)est(dp)->estimate[0].nsize,
2001 est(dp)->estimate[1].level,
2002 (long long)est(dp)->estimate[1].nsize,
2003 est(dp)->estimate[2].level,
2004 (long long)est(dp)->estimate[2].nsize);
2005 if ((est(dp)->estimate[0].level != -1 &&
2006 est(dp)->estimate[0].nsize > (gint64)0) ||
2007 (est(dp)->estimate[1].level != -1 &&
2008 est(dp)->estimate[1].nsize > (gint64)0) ||
2009 (est(dp)->estimate[2].level != -1 &&
2010 est(dp)->estimate[2].nsize > (gint64)0)) {
2012 for (i=MAX_LEVELS-1; i >=0; i--) {
2013 if (est(dp)->estimate[i].level != -1 &&
2014 est(dp)->estimate[i].nsize < (gint64)0) {
2015 est(dp)->estimate[i].level = -1;
2018 enqueue_disk(&estq, dp);
2021 enqueue_disk(&failq, dp);
2022 if(est(dp)->got_estimate && !est(dp)->errstr) {
2023 est(dp)->errstr = vstrallocf("disk %s, all estimate failed",
2028 _("error result for host %s disk %s: missing estimate\n"),
2029 dp->host->hostname, qname);
2030 if (est(dp)->errstr == NULL) {
2031 est(dp)->errstr = vstrallocf(_("missing result for %s in %s response"),
2032 qname, dp->host->hostname);
2036 hostp->up = HOST_DONE;
2038 if (est(dp)->post_dle == 0 &&
2039 (pkt->type == P_REP ||
2040 ((est(dp)->estimate[0].level == -1 ||
2041 est(dp)->estimate[0].nsize > (gint64)0) &&
2042 (est(dp)->estimate[1].level == -1 ||
2043 est(dp)->estimate[1].nsize > (gint64)0) &&
2044 (est(dp)->estimate[2].level == -1 ||
2045 est(dp)->estimate[2].nsize > (gint64)0)))) {
2046 run_server_scripts(EXECUTE_ON_POST_DLE_ESTIMATE,
2047 get_config_name(), dp,
2048 est(dp)->estimate[0].level);
2049 est(dp)->post_dle = 1;
2054 if(hostp->up == HOST_DONE) {
2055 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
2057 if (pkt->type == P_REP) {
2058 run_server_scripts(EXECUTE_ON_POST_HOST_ESTIMATE,
2059 get_config_name(), dp,
2060 est(dp)->estimate[0].level);
2066 /* try to clean up any defunct processes, since Amanda doesn't wait() for
2068 while(waitpid(-1, NULL, WNOHANG)> 0);
2073 errbuf = vstrallocf(_("%s NAK: [NAK parse failed]"), hostp->hostname);
2074 g_fprintf(stderr, _("got strange nak from %s:\n----\n%s----\n\n"),
2075 hostp->hostname, pkt->body);
2079 g_fprintf(stderr,_("got a bad message, stopped at:\n"));
2081 g_fprintf(stderr,_("----\n%s----\n\n"), line);
2082 errbuf = stralloc2(_("badly formatted response from "), hostp->hostname);
2087 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
2089 if(est(dp)->state == DISK_ACTIVE) {
2090 qname = quote_string(dp->name);
2091 est(dp)->state = DISK_DONE;
2092 remove_disk(&waitq, dp);
2093 enqueue_disk(&failq, dp);
2096 est(dp)->errstr = stralloc(errbuf);
2097 g_fprintf(stderr, _("error result for host %s disk %s: %s\n"),
2098 dp->host->hostname, qname, errbuf);
2105 * If there were no disks involved, make sure the error gets
2108 log_add(L_ERROR, "%s", errbuf);
2110 hostp->up = HOST_DONE;
2112 /* try to clean up any defunct processes, since Amanda doesn't wait() for
2114 while(waitpid(-1, NULL, WNOHANG)> 0);
2121 * ========================================================================
2126 static int schedule_order(disk_t *a, disk_t *b); /* subroutines */
2127 static one_est_t *pick_inclevel(disk_t *dp);
2129 static void analyze_estimate(
2135 char *qname = quote_string(dp->name);
2139 g_fprintf(stderr, _("pondering %s:%s... "),
2140 dp->host->hostname, qname);
2141 g_fprintf(stderr, _("next_level0 %d last_level %d "),
2142 ep->next_level0, ep->last_level);
2144 if(get_info(dp->host->hostname, dp->name, &info) == 0) {
2148 ep->degr_est = &default_one_est;
2150 if (ep->next_level0 <= 0 || (have_info && ep->last_level == 0
2151 && (info.command & FORCE_NO_BUMP))) {
2152 if (ep->next_level0 <= 0) {
2153 g_fprintf(stderr,_("(due for level 0) "));
2155 ep->dump_est = est_for_level(dp, 0);
2156 if (ep->dump_est->csize <= (gint64)0) {
2158 _("(no estimate for level 0, picking an incr level)\n"));
2159 ep->dump_est = pick_inclevel(dp);
2161 if (ep->dump_est->nsize == (gint64)-1) {
2162 ep->dump_est = est_for_level(dp, ep->dump_est->level + 1);
2166 total_lev0 += (double) ep->dump_est->csize;
2167 if(ep->last_level == -1 || dp->skip_incr) {
2168 g_fprintf(stderr,_("(%s disk, can't switch to degraded mode)\n"),
2169 dp->skip_incr? "skip-incr":_("new"));
2170 if (dp->skip_incr && ep->degr_mesg == NULL) {
2171 ep->degr_mesg = _("Skpping: skip-incr disk can't be dumped in degraded mode");
2173 ep->degr_est = &default_one_est;
2176 /* fill in degraded mode info */
2177 g_fprintf(stderr,_("(picking inclevel for degraded mode)"));
2178 ep->degr_est = pick_inclevel(dp);
2179 if (ep->degr_est->csize == (gint64)-1) {
2180 ep->degr_est = est_for_level(dp, ep->degr_est->level + 1);
2182 if (ep->degr_est->csize == (gint64)-1) {
2183 g_fprintf(stderr,_("(no inc estimate)"));
2184 if (ep->degr_mesg == NULL)
2185 ep->degr_mesg = _("Skipping: an incremental estimate could not be performed, so disk cannot be dumped in degraded mode");
2186 ep->degr_est = &default_one_est;
2188 g_fprintf(stderr,"\n");
2193 g_fprintf(stderr,_("(not due for a full dump, picking an incr level)\n"));
2194 /* XXX - if this returns -1 may be we should force a total? */
2195 ep->dump_est = pick_inclevel(dp);
2197 if (ep->dump_est->csize == (gint64)-1) {
2198 ep->dump_est = est_for_level(dp, ep->last_level);
2200 if (ep->dump_est->csize == (gint64)-1) {
2201 ep->dump_est = est_for_level(dp, ep->last_level + 1);
2203 if (ep->dump_est->csize == (gint64)-1) {
2204 ep->dump_est = est_for_level(dp, 0);
2206 if (ep->degr_mesg == NULL) {
2207 ep->degr_mesg = _("Skipping: a full is not planned, so can't dump in degraded mode");
2211 g_fprintf(stderr,_(" curr level %d nsize %lld csize %lld "),
2212 ep->dump_est->level, (long long)ep->dump_est->nsize,
2213 (long long)ep->dump_est->csize);
2215 insert_disk(&schedq, dp, schedule_order);
2217 total_size += (gint64)tt_blocksize_kb + ep->dump_est->csize + tape_mark;
2219 /* update the balanced size */
2220 if(!(dp->skip_full || dp->strategy == DS_NOFULL ||
2221 dp->strategy == DS_INCRONLY)) {
2224 lev0size = est_tape_size(dp, 0);
2225 if(lev0size == (gint64)-1) lev0size = ep->last_lev0size;
2227 balanced_size += (double)(lev0size / (gint64)runs_per_cycle);
2230 g_fprintf(stderr,_("total size %lld total_lev0 %1.0lf balanced-lev0size %1.0lf\n"),
2231 (long long)total_size, total_lev0, balanced_size);
2233 /* Log errstr even if the estimate succeeded */
2234 /* It can be an error from a script */
2235 if (est(dp)->errstr) {
2236 char *qerrstr = quote_string(est(dp)->errstr);
2237 /* Log only a warning if a server estimate is available */
2238 if (est(dp)->estimate[0].nsize > 0 ||
2239 est(dp)->estimate[1].nsize > 0 ||
2240 est(dp)->estimate[2].nsize > 0) {
2241 log_add(L_WARNING, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2242 planner_timestamp, qerrstr);
2244 log_add(L_FAIL, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2245 planner_timestamp, qerrstr);
2253 static void handle_failed(
2256 char *errstr, *errstr1, *qerrstr;
2257 char *qname = quote_string(dp->name);
2259 errstr = est(dp)->errstr? est(dp)->errstr : _("hmm, no error indicator!");
2260 errstr1 = vstralloc("[",errstr,"]", NULL);
2261 qerrstr = quote_string(errstr1);
2264 g_fprintf(stderr, _("%s: FAILED %s %s %s 0 %s\n"),
2265 get_pname(), dp->host->hostname, qname, planner_timestamp, qerrstr);
2267 log_add(L_FAIL, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2268 planner_timestamp, qerrstr);
2272 /* XXX - memory leak with *dp */
2277 * insert-sort by decreasing priority, then
2278 * by decreasing size within priority levels.
2281 static int schedule_order(
2288 diff = est(b)->dump_priority - est(a)->dump_priority;
2289 if(diff != 0) return diff;
2291 ldiff = est(b)->dump_est->csize - est(a)->dump_est->csize;
2292 if(ldiff < (gint64)0) return -1; /* XXX - there has to be a better way to dothis */
2293 if(ldiff > (gint64)0) return 1;
2298 static one_est_t *pick_inclevel(
2301 one_est_t *level0_est, *base_est, *bump_est;
2305 level0_est = est_for_level(dp, 0);
2306 base_est = est_for_level(dp, est(dp)->last_level);
2308 /* if last night was level 0, do level 1 tonight, no ifs or buts */
2309 if (base_est->level == 0) {
2310 g_fprintf(stderr,_(" picklev: last night 0, so tonight level 1\n"));
2311 return est_for_level(dp, 1);
2314 /* if no-full option set, always do level 1 */
2315 if(dp->strategy == DS_NOFULL) {
2316 g_fprintf(stderr,_(" picklev: no-full set, so always level 1\n"));
2317 return est_for_level(dp, 1);
2320 /* if we didn't get an estimate, we can't do an inc */
2321 if (base_est->nsize == (gint64)-1) {
2322 bump_est = est_for_level(dp, base_est->level + 1);
2323 if (bump_est->nsize > (gint64)0) /* FORCE_BUMP */
2325 g_fprintf(stderr,_(" picklev: no estimate for level %d, so no incs\n"), base_est->level);
2329 thresh = bump_thresh(base_est->level, level0_est->nsize, dp->bumppercent,
2330 dp->bumpsize, dp->bumpmult);
2333 _(" pick: size %lld level %d days %d (thresh %lldK, %d days)\n"),
2334 (long long)base_est->nsize, base_est->level, est(dp)->level_days,
2335 (long long)thresh, dp->bumpdays);
2337 if(base_est->level == (DUMP_LEVELS - 1)
2338 || est(dp)->level_days < dp->bumpdays
2339 || base_est->nsize <= thresh)
2342 bump_est = est_for_level(dp, base_est->level + 1);
2344 if (bump_est->nsize == (gint64)-1)
2347 g_fprintf(stderr, _(" pick: next size %lld... "),
2348 (long long)bump_est->nsize);
2350 if (base_est->nsize - bump_est->nsize < thresh) {
2351 g_fprintf(stderr, _("not bumped\n"));
2355 qname = quote_string(dp->name);
2356 g_fprintf(stderr, _("BUMPED\n"));
2357 log_add(L_INFO, _("Incremental of %s:%s bumped to level %d."),
2358 dp->host->hostname, qname, bump_est->level);
2368 ** ========================================================================
2371 ** We have two strategies here:
2375 ** If we are trying to fit too much on the tape something has to go. We
2376 ** try to delay totals until tomorrow by converting them into incrementals
2377 ** and, if that is not effective enough, dropping incrementals altogether.
2378 ** While we are searching for the guilty dump (the one that is really
2379 ** causing the schedule to be oversize) we have probably trampled on a lot of
2380 ** innocent dumps, so we maintain a "before image" list and use this to
2381 ** put back what we can.
2383 ** 2. Promote dumps.
2385 ** We try to keep the amount of tape used by total dumps the same each night.
2386 ** If there is some spare tape in this run we have a look to see if any of
2387 ** tonights incrementals could be promoted to totals and leave us with a
2388 ** more balanced cycle.
2391 static void delay_one_dump(disk_t *dp, int delete, ...);
2392 static int promote_highest_priority_incremental(void);
2393 static int promote_hills(void);
2395 /* delay any dumps that will not fit */
2396 static void delay_dumps(void)
2403 gint64 new_total; /* New total_size */
2404 char est_kb[20]; /* Text formatted dump size */
2405 int nb_forced_level_0;
2411 biq.head = biq.tail = NULL;
2414 ** 1. Delay dumps that are way oversize.
2416 ** Dumps larger that the size of the tapes we are using are just plain
2417 ** not going to fit no matter how many other dumps we drop. Delay
2418 ** oversize totals until tomorrow (by which time my owner will have
2419 ** resolved the problem!) and drop incrementals altogether. Naturally
2420 ** a large total might be delayed into a large incremental so these
2421 ** need to be checked for separately.
2424 for(dp = schedq.head; dp != NULL; dp = ndp) {
2425 int avail_tapes = 1;
2426 if (dp->tape_splitsize > (gint64)0)
2427 avail_tapes = conf_runtapes;
2429 ndp = dp->next; /* remove_disk zaps this */
2431 full_size = est_tape_size(dp, 0);
2432 if (full_size > tapetype_get_length(tape) * (gint64)avail_tapes) {
2433 char *qname = quote_string(dp->name);
2434 if (conf_runtapes > 1 && dp->tape_splitsize == (gint64)0) {
2435 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"
2436 ", you could define a splitsize"),
2437 dp->host->hostname, qname,
2438 (long long)full_size);
2440 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"),
2441 dp->host->hostname, qname,
2442 (long long)full_size);
2447 if (est(dp)->dump_est->csize == (gint64)-1 ||
2448 est(dp)->dump_est->csize <= tapetype_get_length(tape) * (gint64)avail_tapes) {
2452 /* Format dumpsize for messages */
2453 g_snprintf(est_kb, 20, "%lld KB,",
2454 (long long)est(dp)->dump_est->csize);
2456 if(est(dp)->dump_est->level == 0) {
2459 message = _("but cannot incremental dump skip-incr disk");
2461 else if(est(dp)->last_level < 0) {
2463 message = _("but cannot incremental dump new disk");
2465 else if(est(dp)->degr_est->level < 0) {
2467 message = _("but no incremental estimate");
2469 else if (est(dp)->degr_est->csize > tapetype_get_length(tape)) {
2471 message = _("incremental dump also larger than tape");
2475 message = _("full dump delayed");
2480 message = _("skipping incremental");
2482 delay_one_dump(dp, delete, _("dump larger than available tape space,"),
2483 est_kb, message, NULL);
2487 ** 2. Delay total dumps.
2489 ** Delay total dumps until tomorrow (or the day after!). We start with
2490 ** the lowest priority (most dispensable) and work forwards. We take
2491 ** care not to delay *all* the dumps since this could lead to a stale
2492 ** mate [for any one disk there are only three ways tomorrows dump will
2493 ** be smaller than todays: 1. we do a level 0 today so tomorows dump
2494 ** will be a level 1; 2. the disk gets more data so that it is bumped
2495 ** tomorrow (this can be a slow process); and, 3. the disk looses some
2496 ** data (when does that ever happen?)].
2499 nb_forced_level_0 = 0;
2501 for(dp = schedq.head; dp != NULL && preserve == NULL; dp = dp->next)
2502 if(est(dp)->dump_est->level == 0)
2505 /* 2.a. Do not delay forced full */
2506 for(dp = schedq.tail;
2507 dp != NULL && total_size > tape_length;
2511 if(est(dp)->dump_est->level != 0) continue;
2513 get_info(dp->host->hostname, dp->name, &info);
2514 if(info.command & FORCE_FULL) {
2515 nb_forced_level_0 += 1;
2520 if(dp != preserve) {
2522 /* Format dumpsize for messages */
2523 g_snprintf(est_kb, 20, "%lld KB,",
2524 (long long)est(dp)->dump_est->csize);
2528 message = _("but cannot incremental dump skip-incr disk");
2530 else if(est(dp)->last_level < 0) {
2532 message = _("but cannot incremental dump new disk");
2534 else if(est(dp)->degr_est->level < 0) {
2536 message = _("but no incremental estimate");
2540 message = _("full dump delayed");
2542 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2547 /* 2.b. Delay forced full if needed */
2548 if(nb_forced_level_0 > 0 && total_size > tape_length) {
2549 for(dp = schedq.tail;
2550 dp != NULL && total_size > tape_length;
2554 if(est(dp)->dump_est->level == 0 && dp != preserve) {
2556 /* Format dumpsize for messages */
2557 g_snprintf(est_kb, 20, "%lld KB,",
2558 (long long)est(dp)->dump_est->csize);
2562 message = _("but cannot incremental dump skip-incr disk");
2564 else if(est(dp)->last_level < 0) {
2566 message = _("but cannot incremental dump new disk");
2568 else if(est(dp)->degr_est->level < 0) {
2570 message = _("but no incremental estimate");
2574 message = _("full dump delayed");
2576 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2583 ** 3. Delay incremental dumps.
2585 ** Delay incremental dumps until tomorrow. This is a last ditch attempt
2586 ** at making things fit. Again, we start with the lowest priority (most
2587 ** dispensable) and work forwards.
2590 for(dp = schedq.tail;
2591 dp != NULL && total_size > tape_length;
2595 if(est(dp)->dump_est->level != 0) {
2597 /* Format dumpsize for messages */
2598 g_snprintf(est_kb, 20, "%lld KB,",
2599 (long long)est(dp)->dump_est->csize);
2601 delay_one_dump(dp, 1,
2602 _("dumps way too big,"),
2604 _("must skip incremental dumps"),
2610 ** 4. Reinstate delayed dumps.
2612 ** We might not have needed to stomp on all of the dumps we have just
2613 ** delayed above. Try to reinstate them all starting with the last one
2614 ** and working forwards. It is unlikely that the last one will fit back
2615 ** in but why complicate the code?
2618 /*@i@*/ for(bi = biq.tail; bi != NULL; bi = nbi) {
2619 int avail_tapes = 1;
2622 if(dp->tape_splitsize > (gint64)0)
2623 avail_tapes = conf_runtapes;
2626 new_total = total_size + (gint64)tt_blocksize_kb +
2627 bi->csize + (gint64)tape_mark;
2629 new_total = total_size - est(dp)->dump_est->csize + bi->csize;
2631 if((new_total <= tape_length) &&
2632 (bi->csize < (tapetype_get_length(tape) * (gint64)avail_tapes))) {
2634 total_size = new_total;
2636 if(bi->level == 0) {
2637 total_lev0 += (double) bi->csize;
2639 insert_disk(&schedq, dp, schedule_order);
2642 est(dp)->dump_est = est_for_level(dp, bi->level);
2646 if(bi->next == NULL)
2647 biq.tail = bi->prev;
2649 (bi->next)->prev = bi->prev;
2650 if(bi->prev == NULL)
2651 biq.head = bi->next;
2653 (bi->prev)->next = bi->next;
2661 ** 5. Output messages about what we have done.
2663 ** We can't output messages while we are delaying dumps because we might
2664 ** reinstate them later. We remember all the messages and output them
2668 /*@i@*/ for(bi = biq.head; bi != NULL; bi = nbi) {
2671 g_fprintf(stderr, "%s: FAILED %s\n", get_pname(), bi->errstr);
2672 log_add(L_FAIL, "%s", bi->errstr);
2676 g_fprintf(stderr, _(" delay: %s now at level %d\n"),
2677 bi->errstr, est(dp)->dump_est->level);
2678 log_add(L_INFO, "%s", bi->errstr);
2686 g_fprintf(stderr, _(" delay: Total size now %lld.\n"),
2687 (long long)total_size);
2694 * Remove a dump or modify it from full to incremental.
2695 * Keep track of it on the bi q in case we can add it back later.
2698 static void delay_one_dump,
2704 char level_str[NUM_STR_SIZE];
2707 char *qname = quote_string(dp->name);
2708 char *errstr, *qerrstr;
2710 arglist_start(argp, delete);
2712 total_size -= (gint64)tt_blocksize_kb + est(dp)->dump_est->csize + (gint64)tape_mark;
2713 if(est(dp)->dump_est->level == 0) {
2714 total_lev0 -= (double) est(dp)->dump_est->csize;
2717 bi = alloc(SIZEOF(bi_t));
2719 bi->prev = biq.tail;
2720 if(biq.tail == NULL)
2723 biq.tail->next = bi;
2726 bi->deleted = delete;
2728 bi->level = est(dp)->dump_est->level;
2729 bi->nsize = est(dp)->dump_est->nsize;
2730 bi->csize = est(dp)->dump_est->csize;
2732 g_snprintf(level_str, SIZEOF(level_str), "%d", est(dp)->dump_est->level);
2733 bi->errstr = vstralloc(dp->host->hostname,
2735 " ", planner_timestamp ? planner_timestamp : "?",
2740 while ((next = arglist_val(argp, char *)) != NULL) {
2741 vstrextend(&errstr, sep, next, NULL);
2744 strappend(errstr, "]");
2745 qerrstr = quote_string(errstr);
2746 vstrextend(&bi->errstr, " ", qerrstr, NULL);
2752 remove_disk(&schedq, dp);
2754 est(dp)->dump_est = est(dp)->degr_est;
2755 total_size += (gint64)tt_blocksize_kb + est(dp)->dump_est->csize + (gint64)tape_mark;
2762 static int promote_highest_priority_incremental(void)
2764 disk_t *dp, *dp1, *dp_promote;
2765 gint64 new_total, new_lev0;
2767 int nb_today, nb_same_day, nb_today2;
2768 int nb_disk_today, nb_disk_same_day;
2772 * return 1 if did so; must update total_size correctly; must not
2773 * cause total_size to exceed tape_length
2777 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2778 one_est_t *level0_est = est_for_level(dp, 0);
2779 est(dp)->promote = -1000;
2781 if (level0_est->nsize <= (gint64)0)
2784 if(est(dp)->next_level0 <= 0)
2787 if(est(dp)->next_level0 > dp->maxpromoteday)
2790 new_total = total_size - est(dp)->dump_est->csize + level0_est->csize;
2791 new_lev0 = (gint64)total_lev0 + level0_est->csize;
2796 nb_disk_same_day = 0;
2797 for(dp1 = schedq.head; dp1 != NULL; dp1 = dp1->next) {
2798 if(est(dp1)->dump_est->level == 0)
2800 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2802 if(strcmp(dp->host->hostname, dp1->host->hostname) == 0) {
2803 if(est(dp1)->dump_est->level == 0)
2805 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2810 /* do not promote if overflow tape */
2811 if(new_total > tape_length)
2814 /* do not promote if overflow balanced size and something today */
2815 /* promote if nothing today */
2816 if((new_lev0 > (gint64)(balanced_size + balance_threshold)) &&
2817 (nb_disk_today > 0))
2820 /* do not promote if only one disk due that day and nothing today */
2821 if(nb_disk_same_day == 1 && nb_disk_today == 0)
2824 nb_today2 = nb_today*nb_today;
2825 if(nb_today == 0 && nb_same_day > 1)
2828 if(nb_same_day >= nb_today2) {
2829 est(dp)->promote = ((nb_same_day - nb_today2)*(nb_same_day - nb_today2)) +
2830 conf_dumpcycle - est(dp)->next_level0;
2833 est(dp)->promote = -nb_today2 +
2834 conf_dumpcycle - est(dp)->next_level0;
2837 qname = quote_string(dp->name);
2838 if(!dp_promote || est(dp_promote)->promote < est(dp)->promote) {
2840 g_fprintf(stderr," try %s:%s %d %d %d = %d\n",
2841 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2844 g_fprintf(stderr,"no try %s:%s %d %d %d = %d\n",
2845 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2851 one_est_t *level0_est;
2853 level0_est = est_for_level(dp, 0);
2855 qname = quote_string(dp->name);
2856 new_total = total_size - est(dp)->dump_est->csize + level0_est->csize;
2857 new_lev0 = (gint64)total_lev0 + level0_est->csize;
2859 total_size = new_total;
2860 total_lev0 = (double)new_lev0;
2861 check_days = est(dp)->next_level0;
2862 est(dp)->degr_est = est(dp)->dump_est;
2863 est(dp)->dump_est = level0_est;
2864 est(dp)->next_level0 = 0;
2867 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2868 dp->host->hostname, qname,
2869 total_lev0, (long long)total_size);
2872 plural(_("Full dump of %s:%s promoted from %d day ahead."),
2873 _("Full dump of %s:%s promoted from %d days ahead."),
2875 dp->host->hostname, qname, check_days);
2883 static int promote_hills(void)
2886 struct balance_stats {
2897 /* If we are already doing a level 0 don't bother */
2901 /* Do the guts of an "amadmin balance" */
2902 my_dumpcycle = conf_dumpcycle;
2903 if(my_dumpcycle > 10000) my_dumpcycle = 10000;
2905 sp = (struct balance_stats *)
2906 alloc(SIZEOF(struct balance_stats) * my_dumpcycle);
2908 for(days = 0; days < my_dumpcycle; days++) {
2910 sp[days].size = (gint64)0;
2913 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2914 days = est(dp)->next_level0; /* This is > 0 by definition */
2915 if(days<my_dumpcycle && !dp->skip_full && dp->strategy != DS_NOFULL &&
2916 dp->strategy != DS_INCRONLY) {
2918 sp[days].size += est(dp)->last_lev0size;
2922 /* Search for a suitable big hill and cut it down */
2924 /* Find the tallest hill */
2925 hill_size = (gint64)0;
2926 for(days = 0; days < my_dumpcycle; days++) {
2927 if(sp[days].disks > 1 && sp[days].size > hill_size) {
2928 hill_size = sp[days].size;
2933 if(hill_size <= (gint64)0) break; /* no suitable hills */
2935 /* Find all the dumps in that hill and try and remove one */
2936 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2937 one_est_t *level0_est;
2938 if(est(dp)->next_level0 != hill_days ||
2939 est(dp)->next_level0 > dp->maxpromoteday ||
2941 dp->strategy == DS_NOFULL ||
2942 dp->strategy == DS_INCRONLY)
2944 level0_est = est_for_level(dp, 0);
2945 if (level0_est->nsize <= (gint64)0)
2947 new_total = total_size - est(dp)->dump_est->csize + level0_est->csize;
2948 if(new_total > tape_length)
2950 /* We found a disk we can promote */
2951 qname = quote_string(dp->name);
2952 total_size = new_total;
2953 total_lev0 += (double)level0_est->csize;
2954 est(dp)->degr_est = est(dp)->dump_est;
2955 est(dp)->dump_est = level0_est;
2956 est(dp)->next_level0 = 0;
2959 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2960 dp->host->hostname, qname,
2961 total_lev0, (long long)total_size);
2964 plural(_("Full dump of %s:%s specially promoted from %d day ahead."),
2965 _("Full dump of %s:%s specially promoted from %d days ahead."),
2967 dp->host->hostname, qname, hill_days);
2973 /* All the disks in that hill were unsuitable. */
2974 sp[hill_days].disks = 0; /* Don't get tricked again */
2982 * ========================================================================
2985 * XXX - memory leak - we shouldn't just throw away *dp
2987 static void output_scheduleline(
2991 time_t dump_time = 0, degr_time = 0;
2992 double dump_kps = 0, degr_kps = 0;
2993 char *schedline = NULL, *degr_str = NULL;
2994 char dump_priority_str[NUM_STR_SIZE];
2995 char dump_level_str[NUM_STR_SIZE];
2996 char dump_nsize_str[NUM_STR_SIZE];
2997 char dump_csize_str[NUM_STR_SIZE];
2998 char dump_time_str[NUM_STR_SIZE];
2999 char dump_kps_str[NUM_STR_SIZE];
3000 char degr_level_str[NUM_STR_SIZE];
3001 char degr_nsize_str[NUM_STR_SIZE];
3002 char degr_csize_str[NUM_STR_SIZE];
3003 char degr_time_str[NUM_STR_SIZE];
3004 char degr_kps_str[NUM_STR_SIZE];
3005 char *dump_date, *degr_date;
3007 char *qname = quote_string(dp->name);
3011 if(ep->dump_est->csize == (gint64)-1) {
3012 /* no estimate, fail the disk */
3014 _("%s: FAILED %s %s %s %d \"[no estimate]\"\n"),
3016 dp->host->hostname, qname, planner_timestamp, ep->dump_est->level);
3017 log_add(L_FAIL, _("%s %s %s %d [no estimate]"),
3018 dp->host->hostname, qname, planner_timestamp, ep->dump_est->level);
3023 dump_date = ep->dump_est->dumpdate;
3024 degr_date = ep->degr_est->dumpdate;
3026 #define fix_rate(rate) (rate < 1.0 ? DEFAULT_DUMPRATE : rate)
3028 if(ep->dump_est->level == 0) {
3029 dump_kps = fix_rate(ep->fullrate);
3030 dump_time = (time_t)((double)ep->dump_est->csize / dump_kps);
3032 if(ep->degr_est->csize != (gint64)-1) {
3033 degr_kps = fix_rate(ep->incrrate);
3034 degr_time = (time_t)((double)ep->degr_est->csize / degr_kps);
3038 dump_kps = fix_rate(ep->incrrate);
3039 dump_time = (time_t)((double)ep->dump_est->csize / dump_kps);
3042 if(ep->dump_est->level == 0 && ep->degr_est->csize != (gint64)-1) {
3043 g_snprintf(degr_level_str, sizeof(degr_level_str),
3044 "%d", ep->degr_est->level);
3045 g_snprintf(degr_nsize_str, sizeof(degr_nsize_str),
3046 "%lld", (long long)ep->degr_est->nsize);
3047 g_snprintf(degr_csize_str, sizeof(degr_csize_str),
3048 "%lld", (long long)ep->degr_est->csize);
3049 g_snprintf(degr_time_str, sizeof(degr_time_str),
3050 "%lld", (long long)degr_time);
3051 g_snprintf(degr_kps_str, sizeof(degr_kps_str),
3053 degr_str = vstralloc(" ", degr_level_str,
3055 " ", degr_nsize_str,
3056 " ", degr_csize_str,
3062 if (ep->degr_mesg) {
3063 degr_mesg = quote_string(ep->degr_mesg);
3065 degr_mesg = quote_string(_("Skipping: cannot dump in degraded mode for unknown reason"));
3067 degr_str = vstralloc(" ", degr_mesg, NULL);
3070 g_snprintf(dump_priority_str, SIZEOF(dump_priority_str),
3071 "%d", ep->dump_priority);
3072 g_snprintf(dump_level_str, SIZEOF(dump_level_str),
3073 "%d", ep->dump_est->level);
3074 g_snprintf(dump_nsize_str, sizeof(dump_nsize_str),
3075 "%lld", (long long)ep->dump_est->nsize);
3076 g_snprintf(dump_csize_str, sizeof(dump_csize_str),
3077 "%lld", (long long)ep->dump_est->csize);
3078 g_snprintf(dump_time_str, sizeof(dump_time_str),
3079 "%lld", (long long)dump_time);
3080 g_snprintf(dump_kps_str, sizeof(dump_kps_str),
3082 features = am_feature_to_string(dp->host->features);
3083 schedline = vstralloc("DUMP ",dp->host->hostname,
3086 " ", planner_timestamp,
3087 " ", dump_priority_str,
3088 " ", dump_level_str,
3090 " ", dump_nsize_str,
3091 " ", dump_csize_str,
3094 degr_str ? degr_str : "",
3097 if (est(dp)->dump_est->guessed == 1) {
3098 log_add(L_WARNING, _("WARNING: no history available for %s:%s; guessing that size will be %lld KB\n"), dp->host->hostname, qname, (long long)est(dp)->dump_est->csize);
3100 fputs(schedline, stdout);
3101 fputs(schedline, stderr);
3118 size = internal_server_estimate(dp, info, level, &stats);
3120 est(dp)->dump_est = &est(dp)->estimate[i];
3121 est(dp)->estimate[i].nsize = size;
3123 est(dp)->estimate[i].guessed = 1;