2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1999 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: planner.c,v 1.206 2006/08/10 23:57:27 paddy_s Exp $
29 * backup schedule planner for the Amanda backup system.
43 #include "amfeatures.h"
44 #include "server_util.h"
46 #include "timestamp.h"
48 #define planner_debug(i,x) do { \
49 if ((i) <= debug_planner) { \
54 #define MAX_LEVELS 3 /* max# of estimates per filesys */
56 #define RUNS_REDZONE 5 /* should be in conf file? */
58 #define PROMOTE_THRESHOLD 0.05 /* if <5% unbalanced, don't promote */
59 #define DEFAULT_DUMPRATE 1024.0 /* K/s */
61 /* configuration file stuff */
64 off_t conf_maxdumpsize;
67 int conf_runspercycle;
72 int conf_usetimestamps;
74 #define HOST_READY ((void *)0) /* must be 0 */
75 #define HOST_ACTIVE ((void *)1)
76 #define HOST_DONE ((void *)2)
78 #define DISK_READY 0 /* must be 0 */
80 #define DISK_PARTIALY_DONE 2
83 typedef struct est_s {
88 off_t dump_nsize; /* native size */
89 off_t dump_csize; /* compressed size */
90 int degr_level; /* if dump_level == 0, what would be the inc level */
91 off_t degr_nsize; /* native degraded size */
92 off_t degr_csize; /* compressed degraded size */
98 double fullrate, incrrate;
99 double fullcomp, incrcomp;
101 int level[MAX_LEVELS];
102 char *dumpdate[MAX_LEVELS];
103 off_t est_size[MAX_LEVELS];
106 #define est(dp) ((est_t *)(dp)->up)
108 /* pestq = partial estimate */
109 disklist_t startq, waitq, pestq, estq, failq, schedq;
111 double total_lev0, balanced_size, balance_threshold;
117 size_t tt_blocksize_kb;
118 int runs_per_cycle = 0;
120 char *planner_timestamp = NULL;
122 static am_feature_t *our_features = NULL;
123 static char *our_feature_string = NULL;
125 /* We keep a LIFO queue of before images for all modifications made
126 * to schedq in our attempt to make the schedule fit on the tape.
127 * Enough information is stored to reinstate a dump if it turns out
128 * that it shouldn't have been touched after all.
130 typedef struct bi_s {
133 int deleted; /* 0=modified, 1=deleted */
134 disk_t *dp; /* The disk that was changed */
135 int level; /* The original level */
136 off_t nsize; /* The original native size */
137 off_t csize; /* The original compressed size */
138 char *errstr; /* A message describing why this disk is here */
141 typedef struct bilist_s {
145 bilist_t biq; /* The BI queue itself */
148 * ========================================================================
153 static void setup_estimate(disk_t *dp);
154 static void get_estimates(void);
155 static void analyze_estimate(disk_t *dp);
156 static void handle_failed(disk_t *dp);
157 static void delay_dumps(void);
158 static int promote_highest_priority_incremental(void);
159 static int promote_hills(void);
160 static void output_scheduleline(disk_t *dp);
161 int main(int, char **);
177 times_t section_start;
181 config_overwrites_t *cfg_ovr = NULL;
182 char *cfg_opt = NULL;
185 * Configure program for internationalization:
186 * 1) Only set the message locale for now.
187 * 2) Set textdomain for all amanda related programs to "amanda"
188 * We don't want to be forced to support dozens of message catalogs.
190 setlocale(LC_MESSAGES, "C");
191 textdomain("amanda");
193 /* drop root privileges */
194 if (!set_root_privs(0)) {
195 error(_("planner must be run setuid root"));
200 set_pname("planner");
202 dbopen(DBG_SUBDIR_SERVER);
204 cfg_ovr = extract_commandline_config_overwrites(&argc, &argv);
209 config_init(CONFIG_INIT_EXPLICIT_NAME | CONFIG_INIT_USE_CWD | CONFIG_INIT_FATAL,
211 apply_config_overwrites(cfg_ovr);
215 check_running_as(RUNNING_AS_DUMPUSER);
217 dbrename(config_name, DBG_SUBDIR_SERVER);
219 /* Don't die when child closes pipe */
220 signal(SIGPIPE, SIG_IGN);
222 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
224 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
225 set_logerror(logerror);
227 section_start = curclock();
229 our_features = am_init_feature_set();
230 our_feature_string = am_feature_to_string(our_features);
232 g_fprintf(stderr, _("%s: pid %ld executable %s version %s\n"),
233 get_pname(), (long) getpid(), argv[0], version());
234 for (i = 0; version_info[i] != NULL; i++)
235 g_fprintf(stderr, _("%s: %s"), get_pname(), version_info[i]);
238 if (argc > 3 && strcmp(argv[2], "--starttime") == 0) {
239 planner_timestamp = stralloc(argv[3]);
245 * 1. Networking Setup
258 * From this point on we are running under our real uid, so we don't
259 * have to worry about opening security holes below. Make sure we
263 if(getpwuid(getuid()) == NULL) {
264 error("can't get login name for my uid %ld", (long)getuid());
269 * 2. Read in Configuration Information
271 * All the Amanda configuration files are loaded before we begin.
274 g_fprintf(stderr,_("READING CONF INFO...\n"));
276 conf_diskfile = config_dir_relative(getconf_str(CNF_DISKFILE));
277 if (read_diskfile(conf_diskfile, &origq) < 0) {
278 error(_("could not load disklist \"%s\""), conf_diskfile);
281 if(origq.head == NULL) {
282 error(_("empty disklist \"%s\""), conf_diskfile);
286 errstr = match_disklist(&origq, argc-diskarg_offset,
287 argv+diskarg_offset);
289 g_fprintf(stderr,"%s",errstr);
293 for(dp = origq.head; dp != NULL; dp = dp->next) {
295 qname = quote_string(dp->name);
296 log_add(L_DISK, "%s %s", dp->host->hostname, qname);
303 error(_("no DLE to backup"));
306 amfree(conf_diskfile);
308 conf_tapelist = config_dir_relative(getconf_str(CNF_TAPELIST));
309 if(read_tapelist(conf_tapelist)) {
310 error(_("could not load tapelist \"%s\""), conf_tapelist);
313 amfree(conf_tapelist);
315 conf_infofile = config_dir_relative(getconf_str(CNF_INFOFILE));
316 if(open_infofile(conf_infofile)) {
317 error(_("could not open info db \"%s\""), conf_infofile);
320 if (check_infofile(conf_infofile, &origq, &errstr) == -1) {
321 log_add(L_WARNING, "problem copying infofile: %s", errstr);
324 amfree(conf_infofile);
326 conf_tapetype = getconf_str(CNF_TAPETYPE);
327 conf_maxdumpsize = getconf_am64(CNF_MAXDUMPSIZE);
328 conf_runtapes = getconf_int(CNF_RUNTAPES);
329 conf_dumpcycle = getconf_int(CNF_DUMPCYCLE);
330 conf_runspercycle = getconf_int(CNF_RUNSPERCYCLE);
331 conf_tapecycle = getconf_int(CNF_TAPECYCLE);
332 conf_etimeout = (time_t)getconf_int(CNF_ETIMEOUT);
333 conf_reserve = getconf_int(CNF_RESERVE);
334 conf_autoflush = getconf_boolean(CNF_AUTOFLUSH);
335 conf_usetimestamps = getconf_boolean(CNF_USETIMESTAMPS);
338 if (planner_timestamp) {
339 if (conf_usetimestamps == 0) {
340 planner_timestamp[8] = '\0';
342 } else if(conf_usetimestamps == 0) {
343 planner_timestamp = get_datestamp_from_time(0);
346 planner_timestamp = get_timestamp_from_time(0);
348 log_add(L_START, _("date %s"), planner_timestamp);
349 g_printf("DATE %s\n", planner_timestamp);
351 g_fprintf(stderr, _("%s: timestamp %s\n"),
352 get_pname(), planner_timestamp);
354 /* some initializations */
356 if(conf_runspercycle == 0) {
357 runs_per_cycle = conf_dumpcycle;
358 } else if(conf_runspercycle == -1 ) {
359 runs_per_cycle = guess_runs_from_tapelist();
361 runs_per_cycle = conf_runspercycle;
363 if (runs_per_cycle <= 0) {
368 * do some basic sanity checking
370 if(conf_tapecycle <= runs_per_cycle) {
371 log_add(L_WARNING, _("tapecycle (%d) <= runspercycle (%d)"),
372 conf_tapecycle, runs_per_cycle);
375 tape = lookup_tapetype(conf_tapetype);
376 if(conf_maxdumpsize > (off_t)0) {
377 tape_length = (off_t)conf_maxdumpsize;
380 tape_length = tapetype_get_length(tape) * (off_t)conf_runtapes;
382 tape_mark = (size_t)tapetype_get_filemark(tape);
383 tt_blocksize_kb = (size_t)tapetype_get_blocksize(tape);
384 tt_blocksize = tt_blocksize_kb * 1024;
386 g_fprintf(stderr, _("%s: time %s: startup took %s secs\n"),
388 walltime_str(curclock()),
389 walltime_str(timessub(curclock(), section_start)));
392 * 3. Send autoflush dumps left on the holding disks
394 * This should give us something to do while we generate the new
398 g_fprintf(stderr,_("\nSENDING FLUSHES...\n"));
402 GSList *holding_list, *holding_file;
403 char *qdisk, *qhname;
405 /* get *all* flushable files in holding */
406 holding_list = holding_get_files_for_flush(NULL);
407 for(holding_file=holding_list; holding_file != NULL;
408 holding_file = holding_file->next) {
409 holding_file_get_dumpfile((char *)holding_file->data, &file);
411 if (holding_file_size((char *)holding_file->data, 1) <= 0) {
412 log_add(L_INFO, "%s: removing file with no data.",
413 (char *)holding_file->data);
414 holding_file_unlink((char *)holding_file->data);
418 qdisk = quote_string(file.disk);
419 qhname = quote_string((char *)holding_file->data);
420 log_add(L_DISK, "%s %s", file.name, qdisk);
422 "FLUSH %s %s %s %d %s\n",
429 "FLUSH %s %s %s %d %s\n",
438 g_slist_free_full(holding_list);
441 g_fprintf(stderr, _("ENDFLUSH\n"));
442 g_fprintf(stdout, _("ENDFLUSH\n"));
446 * 4. Calculate Preliminary Dump Levels
448 * Before we can get estimates from the remote slave hosts, we make a
449 * first attempt at guessing what dump levels we will be dumping at
450 * based on the curinfo database.
453 g_fprintf(stderr,_("\nSETTING UP FOR ESTIMATES...\n"));
454 section_start = curclock();
456 startq.head = startq.tail = NULL;
457 while(!empty(origq)) {
458 disk_t *dp = dequeue_disk(&origq);
464 g_fprintf(stderr, _("%s: time %s: setting up estimates took %s secs\n"),
466 walltime_str(curclock()),
467 walltime_str(timessub(curclock(), section_start)));
471 * 5. Get Dump Size Estimates from Remote Client Hosts
473 * Each host is queried (in parallel) for dump size information on all
474 * of its disks, and the results gathered as they come in.
477 /* go out and get the dump estimates */
479 g_fprintf(stderr,_("\nGETTING ESTIMATES...\n"));
480 section_start = curclock();
482 estq.head = estq.tail = NULL;
483 pestq.head = pestq.tail = NULL;
484 waitq.head = waitq.tail = NULL;
485 failq.head = failq.tail = NULL;
489 g_fprintf(stderr, _("%s: time %s: getting estimates took %s secs\n"),
491 walltime_str(curclock()),
492 walltime_str(timessub(curclock(), section_start)));
495 * At this point, all disks with estimates are in estq, and
496 * all the disks on hosts that didn't respond to our inquiry
500 dump_queue("FAILED", failq, 15, stderr);
501 dump_queue("DONE", estq, 15, stderr);
505 * 6. Analyze Dump Estimates
507 * Each disk's estimates are looked at to determine what level it
508 * should dump at, and to calculate the expected size and time taking
509 * historical dump rates and compression ratios into account. The
510 * total expected size is accumulated as well.
513 g_fprintf(stderr,_("\nANALYZING ESTIMATES...\n"));
514 section_start = curclock();
516 /* an empty tape still has a label and an endmark */
517 total_size = ((off_t)tt_blocksize_kb + (off_t)tape_mark) * (off_t)2;
521 schedq.head = schedq.tail = NULL;
522 while(!empty(estq)) analyze_estimate(dequeue_disk(&estq));
523 while(!empty(failq)) handle_failed(dequeue_disk(&failq));
526 * At this point, all the disks are on schedq sorted by priority.
527 * The total estimated size of the backups is in total_size.
533 g_fprintf(stderr, _("INITIAL SCHEDULE (size %lld):\n"),
534 (long long)total_size);
535 for(dp = schedq.head; dp != NULL; dp = dp->next) {
536 qname = quote_string(dp->name);
537 g_fprintf(stderr, _(" %s %s pri %d lev %d nsize %lld csize %lld\n"),
538 dp->host->hostname, qname, est(dp)->dump_priority,
540 (long long)est(dp)->dump_nsize,
541 (long long)est(dp)->dump_csize);
548 * 7. Delay Dumps if Schedule Too Big
550 * If the generated schedule is too big to fit on the tape, we need to
551 * delay some full dumps to make room. Incrementals will be done
552 * instead (except for new or forced disks).
554 * In extreme cases, delaying all the full dumps is not even enough.
555 * If so, some low-priority incrementals will be skipped completely
556 * until the dumps fit on the tape.
559 g_fprintf(stderr, _("\nDELAYING DUMPS IF NEEDED, total_size %lld, tape length %lld mark %zu\n"),
560 (long long)total_size,
561 (long long)tape_length,
564 initial_size = total_size;
568 /* XXX - why bother checking this? */
569 if(empty(schedq) && total_size < initial_size) {
570 error(_("cannot fit anything on tape, bailing out"));
576 * 8. Promote Dumps if Schedule Too Small
578 * Amanda attempts to balance the full dumps over the length of the
579 * dump cycle. If this night's full dumps are too small relative to
580 * the other nights, promote some high-priority full dumps that will be
581 * due for the next run, to full dumps for tonight, taking care not to
582 * overflow the tape size.
584 * This doesn't work too well for small sites. For these we scan ahead
585 * looking for nights that have an excessive number of dumps and promote
588 * Amanda never delays full dumps just for the sake of balancing the
589 * schedule, so it can take a full cycle to balance the schedule after
594 _("\nPROMOTING DUMPS IF NEEDED, total_lev0 %1.0lf, balanced_size %1.0lf...\n"),
595 total_lev0, balanced_size);
597 balance_threshold = balanced_size * PROMOTE_THRESHOLD;
599 while((balanced_size - total_lev0) > balance_threshold && moved_one)
600 moved_one = promote_highest_priority_incremental();
602 moved_one = promote_hills();
604 g_fprintf(stderr, _("%s: time %s: analysis took %s secs\n"),
606 walltime_str(curclock()),
607 walltime_str(timessub(curclock(), section_start)));
610 /* done with prvileged ops, make sure root privilege is dropped */
611 if ( geteuid() == 0 ) {
619 * The schedule goes to stdout, presumably to driver. A copy is written
620 * on stderr for the debug file.
623 g_fprintf(stderr,_("\nGENERATING SCHEDULE:\n--------\n"));
625 while(!empty(schedq)) output_scheduleline(dequeue_disk(&schedq));
626 g_fprintf(stderr, _("--------\n"));
629 log_add(L_FINISH, _("date %s time %s"), planner_timestamp, walltime_str(curclock()));
632 amfree(planner_timestamp);
633 amfree(our_feature_string);
634 am_release_feature_set(our_features);
645 * ========================================================================
646 * SETUP FOR ESTIMATES
650 static void askfor(est_t *, int, int, info_t *);
651 static int last_level(info_t *info); /* subroutines */
652 static off_t est_size(disk_t *dp, int level);
653 static off_t est_tape_size(disk_t *dp, int level);
654 static int next_level0(disk_t *dp, info_t *info);
655 static int runs_at(info_t *info, int lev);
656 static off_t bump_thresh(int level, off_t size_level_0, int bumppercent, off_t bumpsize, double bumpmult);
657 static int when_overwrite(char *label);
660 est_t *ep, /* esimate data block */
661 int seq, /* sequence number of request */
662 int lev, /* dump level being requested */
663 info_t *info) /* info block for disk */
665 if(seq < 0 || seq >= MAX_LEVELS) {
666 error(_("error [planner askfor: seq out of range 0..%d: %d]"),
670 if(lev < -1 || lev >= DUMP_LEVELS) {
671 error(_("error [planner askfor: lev out of range -1..%d: %d]"),
678 ep->dumpdate[seq] = (char *)0;
679 ep->est_size[seq] = (off_t)-2;
683 ep->level[seq] = lev;
685 ep->dumpdate[seq] = stralloc(get_dumpdate(info,lev));
687 ep->est_size[seq] = (off_t)-2;
702 assert(dp && dp->host);
704 qname = quote_string(dp->name);
705 g_fprintf(stderr, _("%s: time %s: setting up estimates for %s:%s\n"),
706 get_pname(), walltime_str(curclock()),
707 dp->host->hostname, qname);
709 /* get current information about disk */
711 if(get_info(dp->host->hostname, dp->name, &info)) {
712 /* no record for this disk, make a note of it */
713 log_add(L_INFO, _("Adding new disk %s:%s."), dp->host->hostname, dp->name);
716 /* setup working data struct for disk */
718 ep = alloc(SIZEOF(est_t));
719 dp->up = (void *) ep;
720 ep->state = DISK_READY;
721 ep->dump_nsize = (off_t)-1;
722 ep->dump_csize = (off_t)-1;
723 ep->dump_priority = dp->priority;
727 /* calculated fields */
729 if (ISSET(info.command, FORCE_FULL)) {
730 /* force a level 0, kind of like a new disk */
731 if(dp->strategy == DS_NOFULL) {
733 * XXX - Not sure what it means to force a no-full disk. The
734 * purpose of no-full is to just dump changes relative to a
735 * stable base, for example root partitions that vary only
736 * slightly from a site-wide prototype. Only the variations
739 * If we allow a level 0 onto the Amanda cycle, then we are
740 * hosed when that tape gets re-used next. Disallow this for
744 _("Cannot force full dump of %s:%s with no-full option."),
745 dp->host->hostname, qname);
747 /* clear force command */
748 CLR(info.command, FORCE_FULL);
749 if(put_info(dp->host->hostname, dp->name, &info)) {
750 error(_("could not put info record for %s:%s: %s"),
751 dp->host->hostname, qname, strerror(errno));
754 ep->last_level = last_level(&info);
755 ep->next_level0 = next_level0(dp, &info);
759 ep->next_level0 = -conf_dumpcycle;
760 log_add(L_INFO, _("Forcing full dump of %s:%s as directed."),
761 dp->host->hostname, qname);
764 else if(dp->strategy == DS_NOFULL) {
765 /* force estimate of level 1 */
767 ep->next_level0 = next_level0(dp, &info);
770 ep->last_level = last_level(&info);
771 ep->next_level0 = next_level0(dp, &info);
774 /* adjust priority levels */
776 /* warn if dump will be overwritten */
777 if (ep->last_level > -1 && strlen(info.inf[0].label) > 0) {
778 overwrite_runs = when_overwrite(info.inf[0].label);
779 if(overwrite_runs == 0) {
780 log_add(L_WARNING, _("Last full dump of %s:%s "
781 "on tape %s overwritten on this run."),
782 dp->host->hostname, qname, info.inf[0].label);
783 } else if(overwrite_runs <= RUNS_REDZONE) {
785 plural(_("Last full dump of %s:%s on tape %s overwritten in %d run."),
786 _("Last full dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
787 dp->host->hostname, qname, info.inf[0].label,
792 /* warn if last level 1 will be overwritten */
793 if (ep->last_level > 1 && strlen(info.inf[1].label) > 0) {
794 overwrite_runs = when_overwrite(info.inf[1].label);
795 if(overwrite_runs == 0) {
796 log_add(L_WARNING, _("Last level 1 dump of %s:%s "
797 "on tape %s overwritten on this run, resetting to level 1"),
798 dp->host->hostname, qname, info.inf[1].label);
800 } else if(overwrite_runs <= RUNS_REDZONE) {
802 plural(_("Last level 1 dump of %s:%s on tape %s overwritten in %d run."),
803 _("Last level 1 dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
804 dp->host->hostname, qname, info.inf[1].label,
809 if(ep->next_level0 < 0) {
810 g_fprintf(stderr,plural(_("%s:%s overdue %d day for level 0\n"),
811 _("%s:%s overdue %d days for level 0\n"),
813 dp->host->hostname, qname, (-ep->next_level0));
814 ep->dump_priority -= ep->next_level0;
816 else if (ISSET(info.command, FORCE_FULL))
817 ep->dump_priority += 1;
818 /* else XXX bump up the priority of incrementals that failed last night */
820 /* handle external level 0 dumps */
822 if(dp->skip_full && dp->strategy != DS_NOINC) {
823 if(ep->next_level0 <= 0) {
824 /* update the date field */
825 info.inf[0].date = today;
826 CLR(info.command, FORCE_FULL);
827 ep->next_level0 += conf_dumpcycle;
829 if(put_info(dp->host->hostname, dp->name, &info)) {
830 error(_("could not put info record for %s:%s: %s"),
831 dp->host->hostname, qname, strerror(errno));
834 log_add(L_INFO, _("Skipping full dump of %s:%s today."),
835 dp->host->hostname, qname);
836 g_fprintf(stderr,_("%s:%s lev 0 skipped due to skip-full flag\n"),
837 dp->host->hostname, qname);
838 /* don't enqueue the disk */
839 askfor(ep, 0, -1, &info);
840 askfor(ep, 1, -1, &info);
841 askfor(ep, 2, -1, &info);
842 g_fprintf(stderr, _("%s: SKIPPED %s %s 0 [skip-full]\n"),
843 get_pname(), dp->host->hostname, qname);
844 log_add(L_SUCCESS, _("%s %s %s 0 [skipped: skip-full]"),
845 dp->host->hostname, qname, planner_timestamp);
850 if(ep->last_level == -1) {
851 /* probably a new disk, but skip-full means no full! */
855 if(ep->next_level0 == 1) {
856 log_add(L_WARNING, _("Skipping full dump of %s:%s tomorrow."),
857 dp->host->hostname, qname);
861 if(dp->strategy == DS_INCRONLY && ep->last_level == -1 && !ISSET(info.command, FORCE_FULL)) {
862 /* don't enqueue the disk */
863 askfor(ep, 0, -1, &info);
864 askfor(ep, 1, -1, &info);
865 askfor(ep, 2, -1, &info);
866 log_add(L_FAIL, _("%s %s 19000101 1 [Skipping incronly because no full dump were done]"),
867 dp->host->hostname, qname);
868 g_fprintf(stderr,_("%s:%s lev 1 skipped due to strategy incronly and no full dump were done\n"),
869 dp->host->hostname, qname);
874 /* handle "skip-incr" type archives */
876 if(dp->skip_incr && ep->next_level0 > 0) {
877 g_fprintf(stderr,_("%s:%s lev 1 skipped due to skip-incr flag\n"),
878 dp->host->hostname, qname);
879 /* don't enqueue the disk */
880 askfor(ep, 0, -1, &info);
881 askfor(ep, 1, -1, &info);
882 askfor(ep, 2, -1, &info);
884 g_fprintf(stderr, _("%s: SKIPPED %s %s 1 [skip-incr]\n"),
885 get_pname(), dp->host->hostname, qname);
887 log_add(L_SUCCESS, _("%s %s %s 1 [skipped: skip-incr]"),
888 dp->host->hostname, qname, planner_timestamp);
893 if( ep->last_level == -1 && ep->next_level0 > 0 &&
894 dp->strategy != DS_NOFULL && dp->strategy != DS_INCRONLY &&
895 conf_reserve == 100) {
896 log_add(L_WARNING, _("%s:%s mismatch: no tapelist record, "
897 "but curinfo next_level0: %d."),
898 dp->host->hostname, qname, ep->next_level0);
902 if(ep->last_level == 0) ep->level_days = 0;
903 else ep->level_days = runs_at(&info, ep->last_level);
904 ep->last_lev0size = info.inf[0].csize;
906 ep->fullrate = perf_average(info.full.rate, 0.0);
907 ep->incrrate = perf_average(info.incr.rate, 0.0);
909 ep->fullcomp = perf_average(info.full.comp, dp->comprate[0]);
910 ep->incrcomp = perf_average(info.incr.comp, dp->comprate[1]);
912 /* determine which estimates to get */
916 if (dp->strategy == DS_NOINC ||
918 (!ISSET(info.command, FORCE_BUMP) ||
920 ep->last_level == -1))) {
921 if(info.command & FORCE_BUMP && ep->last_level == -1) {
923 _("Remove force-bump command of %s:%s because it's a new disk."),
924 dp->host->hostname, qname);
926 switch (dp->strategy) {
929 askfor(ep, i++, 0, &info);
931 log_add(L_INFO, _("Ignoring skip_full for %s:%s "
932 "because the strategy is NOINC."),
933 dp->host->hostname, qname);
935 if(info.command & FORCE_BUMP) {
937 _("Ignoring FORCE_BUMP for %s:%s because the strategy is NOINC."),
938 dp->host->hostname, qname);
947 if (ISSET(info.command, FORCE_FULL))
953 if(!dp->skip_incr && !(dp->strategy == DS_NOINC)) {
954 if(ep->last_level == -1) { /* a new disk */
955 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY) {
956 askfor(ep, i++, 1, &info);
958 assert(!dp->skip_full); /* should be handled above */
960 } else { /* not new, pick normally */
963 curr_level = ep->last_level;
965 if (ISSET(info.command, FORCE_NO_BUMP)) {
966 if(curr_level > 0) { /* level 0 already asked for */
967 askfor(ep, i++, curr_level, &info);
969 log_add(L_INFO,_("Preventing bump of %s:%s as directed."),
970 dp->host->hostname, qname);
971 } else if (ISSET(info.command, FORCE_BUMP)
972 && curr_level + 1 < DUMP_LEVELS) {
973 askfor(ep, i++, curr_level+1, &info);
974 log_add(L_INFO,_("Bumping of %s:%s at level %d as directed."),
975 dp->host->hostname, qname, curr_level+1);
976 } else if (curr_level == 0) {
977 askfor(ep, i++, 1, &info);
979 askfor(ep, i++, curr_level, &info);
981 * If last time we dumped less than the threshold, then this
982 * time we will too, OR the extra size will be charged to both
983 * cur_level and cur_level + 1, so we will never bump. Also,
984 * if we haven't been at this level 2 days, or the dump failed
985 * last night, we can't bump.
987 if((info.inf[curr_level].size == (off_t)0 || /* no data, try it anyway */
988 (((info.inf[curr_level].size > bump_thresh(curr_level, info.inf[0].size,dp->bumppercent, dp->bumpsize, dp->bumpmult)))
989 && ep->level_days >= dp->bumpdays))
990 && curr_level + 1 < DUMP_LEVELS) {
991 askfor(ep, i++, curr_level+1, &info);
997 while(i < MAX_LEVELS) /* mark end of estimates */
998 askfor(ep, i++, -1, &info);
1002 g_fprintf(stderr, _("setup_estimate: %s:%s: command %u, options: %s "
1003 "last_level %d next_level0 %d level_days %d getting estimates "
1004 "%d (%lld) %d (%lld) %d (%lld)\n"),
1005 dp->host->hostname, qname, info.command,
1006 dp->strategy == DS_NOFULL ? "no-full" :
1007 dp->strategy == DS_INCRONLY ? "incr-only" :
1008 dp->skip_full ? "skip-full" :
1009 dp->skip_incr ? "skip-incr" : "none",
1010 ep->last_level, ep->next_level0, ep->level_days,
1011 ep->level[0], (long long)ep->est_size[0],
1012 ep->level[1], (long long)ep->est_size[1],
1013 ep->level[2], (long long)ep->est_size[2]);
1015 assert(ep->level[0] != -1);
1016 enqueue_disk(&startq, dp);
1020 static int when_overwrite(
1026 runtapes = conf_runtapes;
1027 if(runtapes == 0) runtapes = 1;
1029 if((tp = lookup_tapelabel(label)) == NULL)
1030 return 1; /* "shouldn't happen", but trigger warning message */
1031 else if(tp->reuse == 0)
1033 else if(lookup_nb_tape() > conf_tapecycle)
1034 return (lookup_nb_tape() - tp->position) / runtapes;
1036 return (conf_tapecycle - tp->position) / runtapes;
1039 /* Return the estimated size for a particular dump */
1040 static off_t est_size(
1046 for(i = 0; i < MAX_LEVELS; i++) {
1047 if(level == est(dp)->level[i])
1048 return est(dp)->est_size[i];
1053 /* Return the estimated on-tape size of a particular dump */
1054 static off_t est_tape_size(
1061 size = est_size(dp, level);
1063 if(size == (off_t)-1) return size;
1065 if(dp->compress == COMP_NONE)
1068 if(level == 0) ratio = est(dp)->fullcomp;
1069 else ratio = est(dp)->incrcomp;
1072 * make sure over-inflated compression ratios don't throw off the
1073 * estimates, this is mostly for when you have a small dump getting
1074 * compressed which takes up alot more disk/tape space relatively due
1075 * to the overhead of the compression. This is specifically for
1076 * Digital Unix vdump. This patch is courtesy of Rudolf Gabler
1077 * (RUG@USM.Uni-Muenchen.DE)
1080 if(ratio > 1.1) ratio = 1.1;
1082 size = (off_t)((double)size * ratio);
1085 * Ratio can be very small in some error situations, so make sure
1086 * size goes back greater than zero. It may not be right, but
1087 * indicates we did get an estimate.
1089 if(size <= (off_t)0) {
1097 /* what was the level of the last successful dump to tape? */
1098 static int last_level(
1101 int min_pos, min_level, i;
1102 time_t lev0_date, last_date;
1105 if(info->last_level != -1)
1106 return info->last_level;
1108 /* to keep compatibility with old infofile */
1109 min_pos = 1000000000;
1113 for(i = 0; i < 9; i++) {
1114 if(conf_reserve < 100) {
1115 if(i == 0) lev0_date = info->inf[0].date;
1116 else if(info->inf[i].date < lev0_date) continue;
1117 if(info->inf[i].date > last_date) {
1118 last_date = info->inf[i].date;
1123 if((tp = lookup_tapelabel(info->inf[i].label)) == NULL) continue;
1124 /* cull any entries from previous cycles */
1125 if(i == 0) lev0_date = info->inf[0].date;
1126 else if(info->inf[i].date < lev0_date) continue;
1128 if(tp->position < min_pos) {
1129 min_pos = tp->position;
1134 info->last_level = i;
1138 /* when is next level 0 due? 0 = today, 1 = tomorrow, etc*/
1144 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY)
1145 return 1; /* fake it */
1146 else if (dp->strategy == DS_NOINC)
1148 else if(info->inf[0].date < (time_t)0)
1149 return -days_diff(EPOCH, today); /* new disk */
1151 return dp->dumpcycle - days_diff(info->inf[0].date, today);
1154 /* how many runs at current level? */
1159 tape_t *cur_tape, *old_tape;
1162 last = last_level(info);
1163 if(lev != last) return 0;
1164 if(lev == 0) return 1;
1166 if(info->consecutive_runs != -1)
1167 return info->consecutive_runs;
1169 /* to keep compatibility with old infofile */
1170 cur_tape = lookup_tapelabel(info->inf[lev].label);
1171 old_tape = lookup_tapelabel(info->inf[lev-1].label);
1172 if(cur_tape == NULL || old_tape == NULL) return 0;
1174 if(conf_runtapes == 0)
1175 nb_runs = (old_tape->position - cur_tape->position) / 1;
1177 nb_runs = (old_tape->position - cur_tape->position) / conf_runtapes;
1178 info->consecutive_runs = nb_runs;
1184 static off_t bump_thresh(
1193 if ((bumppercent != 0) && (size_level_0 > (off_t)1024)) {
1194 bump = ((double)size_level_0 * (double)bumppercent) / 100.0;
1197 bump = (double)bumpsize;
1199 while(--level) bump = bump * bumpmult;
1207 * ========================================================================
1208 * GET REMOTE DUMP SIZE ESTIMATES
1212 static void getsize(am_host_t *hostp);
1213 static disk_t *lookup_hostdisk(am_host_t *hp, char *str);
1214 static void handle_result(void *datap, pkt_t *pkt, security_handle_t *sech);
1217 static void get_estimates(void)
1221 int something_started;
1223 something_started = 1;
1224 while(something_started) {
1225 something_started = 0;
1226 for(dp = startq.head; dp != NULL; dp = dp->next) {
1228 if(hostp->up == HOST_READY) {
1229 something_started = 1;
1233 * dp is no longer on startq, so dp->next is not valid
1234 * and we have to start all over.
1242 while(!empty(waitq)) {
1243 disk_t *dp = dequeue_disk(&waitq);
1244 est(dp)->errstr = _("hmm, disk was stranded on waitq");
1245 enqueue_disk(&failq, dp);
1248 while(!empty(pestq)) {
1249 disk_t *dp = dequeue_disk(&pestq);
1250 char * qname = quote_string(dp->name);
1252 if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < (off_t)0) {
1253 if(est(dp)->est_size[0] == (off_t)-1) {
1254 log_add(L_WARNING, _("disk %s:%s, estimate of level %d failed."),
1255 dp->host->hostname, qname, est(dp)->level[0]);
1259 _("disk %s:%s, estimate of level %d timed out."),
1260 dp->host->hostname, qname, est(dp)->level[0]);
1262 est(dp)->level[0] = -1;
1265 if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < (off_t)0) {
1266 if(est(dp)->est_size[1] == (off_t)-1) {
1268 _("disk %s:%s, estimate of level %d failed."),
1269 dp->host->hostname, qname, est(dp)->level[1]);
1273 _("disk %s:%s, estimate of level %d timed out."),
1274 dp->host->hostname, qname, est(dp)->level[1]);
1276 est(dp)->level[1] = -1;
1279 if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < (off_t)0) {
1280 if(est(dp)->est_size[2] == (off_t)-1) {
1282 _("disk %s:%s, estimate of level %d failed."),
1283 dp->host->hostname, qname, est(dp)->level[2]);
1287 _("disk %s:%s, estimate of level %d timed out."),
1288 dp->host->hostname, qname, est(dp)->level[2]);
1290 est(dp)->level[2] = -1;
1293 if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > (off_t)0) ||
1294 (est(dp)->level[1] != -1 && est(dp)->est_size[1] > (off_t)0) ||
1295 (est(dp)->level[2] != -1 && est(dp)->est_size[2] > (off_t)0)) {
1296 enqueue_disk(&estq, dp);
1299 est(dp)->errstr = vstralloc("disk ", qname,
1300 _(", all estimate timed out"), NULL);
1301 enqueue_disk(&failq, dp);
1307 static void getsize(
1310 char number[NUM_STR_SIZE], *req;
1313 time_t estimates, timeout;
1315 const security_driver_t *secdrv;
1321 assert(hostp->disks != NULL);
1323 if(hostp->up != HOST_READY) {
1328 * The first time through here we send a "noop" request. This will
1329 * return the feature list from the client if it supports that.
1330 * If it does not, handle_result() will set the feature list to an
1331 * empty structure. In either case, we do the disks on the second
1332 * (and subsequent) pass(es).
1334 if(hostp->features != NULL) { /* sendsize service */
1338 int has_features = am_has_feature(hostp->features,
1339 fe_req_options_features);
1340 int has_hostname = am_has_feature(hostp->features,
1341 fe_req_options_hostname);
1342 int has_maxdumps = am_has_feature(hostp->features,
1343 fe_req_options_maxdumps);
1344 int has_config = am_has_feature(hostp->features,
1345 fe_req_options_config);
1347 g_snprintf(number, SIZEOF(number), "%d", hostp->maxdumps);
1348 req = vstralloc("SERVICE ", "sendsize", "\n",
1350 has_features ? "features=" : "",
1351 has_features ? our_feature_string : "",
1352 has_features ? ";" : "",
1353 has_maxdumps ? "maxdumps=" : "",
1354 has_maxdumps ? number : "",
1355 has_maxdumps ? ";" : "",
1356 has_hostname ? "hostname=" : "",
1357 has_hostname ? hostp->hostname : "",
1358 has_hostname ? ";" : "",
1359 has_config ? "config=" : "",
1360 has_config ? config_name : "",
1361 has_config ? ";" : "",
1364 req_len = strlen(req);
1365 req_len += 128; /* room for SECURITY ... */
1367 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1371 if(dp->todo == 0) continue;
1373 if(est(dp)->state != DISK_READY) continue;
1375 est(dp)->got_estimate = 0;
1376 if(est(dp)->level[0] == -1) {
1377 est(dp)->state = DISK_DONE;
1381 qname = quote_string(dp->name);
1382 qdevice = quote_string(dp->device);
1383 if(dp->estimate == ES_CLIENT ||
1384 dp->estimate == ES_CALCSIZE) {
1387 for(i = 0; i < MAX_LEVELS; i++) {
1389 char *exclude1 = "";
1390 char *exclude2 = "";
1391 char *excludefree = NULL;
1392 char *include1 = "";
1393 char *include2 = "";
1394 char *includefree = NULL;
1395 char spindle[NUM_STR_SIZE];
1396 char level[NUM_STR_SIZE];
1397 int lev = est(dp)->level[i];
1399 if(lev == -1) break;
1401 g_snprintf(level, SIZEOF(level), "%d", lev);
1402 g_snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1403 if(am_has_feature(hostp->features,fe_sendsize_req_options)){
1404 exclude1 = " OPTIONS |";
1405 exclude2 = optionstr(dp, hostp->features, NULL);
1406 if ( exclude2 == NULL ) {
1407 error(_("problem with option string, check the dumptype definition.\n"));
1409 excludefree = exclude2;
1413 if(dp->exclude_file &&
1414 dp->exclude_file->nb_element == 1) {
1415 exclude1 = " exclude-file=";
1417 quote_string(dp->exclude_file->first->name);
1418 excludefree = exclude2;
1420 else if(dp->exclude_list &&
1421 dp->exclude_list->nb_element == 1) {
1422 exclude1 = " exclude-list=";
1424 quote_string(dp->exclude_list->first->name);
1425 excludefree = exclude2;
1427 if(dp->include_file &&
1428 dp->include_file->nb_element == 1) {
1429 include1 = " include-file=";
1431 quote_string(dp->include_file->first->name);
1432 includefree = include2;
1434 else if(dp->include_list &&
1435 dp->include_list->nb_element == 1) {
1436 include1 = " include-list=";
1438 quote_string(dp->include_list->first->name);
1439 includefree = include2;
1443 if(dp->estimate == ES_CALCSIZE &&
1444 !am_has_feature(hostp->features, fe_calcsize_estimate)) {
1445 log_add(L_WARNING,_("%s:%s does not support CALCSIZE for estimate, using CLIENT.\n"),
1446 hostp->hostname, qname);
1447 dp->estimate = ES_CLIENT;
1449 if(dp->estimate == ES_CLIENT)
1452 calcsize = "CALCSIZE ";
1454 if(strcmp(dp->program,"DUMP") == 0 ||
1455 strcmp(dp->program,"GNUTAR") == 0) {
1458 backup_api = "BACKUP ";
1460 l = vstralloc(calcsize,
1464 " ", dp->device ? qdevice : "",
1466 " ", est(dp)->dumpdate[i],
1468 " ", exclude1, exclude2,
1469 ((includefree != NULL) ? " " : ""),
1476 amfree(includefree);
1477 amfree(excludefree);
1485 est(dp)->state = DISK_ACTIVE;
1486 remove_disk(&startq, dp);
1488 else if (dp->estimate == ES_SERVER) {
1491 get_info(dp->host->hostname, dp->name, &info);
1492 for(i = 0; i < MAX_LEVELS; i++) {
1494 int lev = est(dp)->level[i];
1496 if(lev == -1) break;
1497 if(lev == 0) { /* use latest level 0, should do extrapolation */
1498 off_t est_size = (off_t)0;
1501 for(j=NB_HISTORY-2;j>=0;j--) {
1502 if(info.history[j].level == 0) {
1503 if(info.history[j].size < (off_t)0) continue;
1504 est_size = info.history[j].size;
1509 est(dp)->est_size[i] = est_size;
1511 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1512 est(dp)->est_size[i] = info.inf[lev].size;
1515 est(dp)->est_size[i] = (off_t)1000000;
1518 else if(lev == est(dp)->last_level) {
1519 /* means of all X day at the same level */
1522 off_t est_size_day[NB_DAY];
1523 int nb_est_day[NB_DAY];
1524 for(j=0;j<NB_DAY;j++) {
1525 est_size_day[j]=(off_t)0;
1529 for(j=NB_HISTORY-2;j>=0;j--) {
1530 if(info.history[j].level <= 0) continue;
1531 if(info.history[j].size < (off_t)0) continue;
1532 if(info.history[j].level==info.history[j+1].level) {
1533 if(nb_day <NB_DAY-1) nb_day++;
1534 est_size_day[nb_day] += info.history[j].size;
1535 nb_est_day[nb_day]++;
1541 nb_day = info.consecutive_runs + 1;
1542 if(nb_day > NB_DAY-1) nb_day = NB_DAY-1;
1544 while(nb_day > 0 && nb_est_day[nb_day] == 0) nb_day--;
1546 if(nb_est_day[nb_day] > 0) {
1547 est(dp)->est_size[i] = est_size_day[nb_day] /
1548 (off_t)nb_est_day[nb_day];
1550 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1551 est(dp)->est_size[i] = info.inf[lev].size;
1554 est(dp)->est_size[i] = (off_t)10000;
1557 else if(lev == est(dp)->last_level + 1) {
1558 /* means of all first day at a new level */
1559 off_t est_size = (off_t)0;
1562 for(j=NB_HISTORY-2;j>=0;j--) {
1563 if(info.history[j].level <= 0) continue;
1564 if(info.history[j].size < (off_t)0) continue;
1565 if(info.history[j].level == info.history[j+1].level + 1 ) {
1566 est_size += info.history[j].size;
1571 est(dp)->est_size[i] = est_size / (off_t)nb_est;
1573 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1574 est(dp)->est_size[i] = info.inf[lev].size;
1577 est(dp)->est_size[i] = (off_t)100000;
1581 g_fprintf(stderr,_("%s time %s: got result for host %s disk %s:"),
1582 get_pname(), walltime_str(curclock()),
1583 dp->host->hostname, qname);
1584 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1585 est(dp)->level[0], (long long)est(dp)->est_size[0],
1586 est(dp)->level[1], (long long)est(dp)->est_size[1],
1587 est(dp)->level[2], (long long)est(dp)->est_size[2]);
1588 est(dp)->state = DISK_DONE;
1589 remove_disk(&startq, dp);
1590 enqueue_disk(&estq, dp);
1596 if(estimates == 0) {
1598 hostp->up = HOST_DONE;
1602 if (conf_etimeout < 0) {
1603 timeout = - conf_etimeout;
1605 timeout = estimates * conf_etimeout;
1607 } else { /* noop service */
1608 req = vstralloc("SERVICE ", "noop", "\n",
1610 "features=", our_feature_string, ";",
1614 * We use ctimeout for the "noop" request because it should be
1615 * very fast and etimeout has other side effects.
1617 timeout = (time_t)getconf_int(CNF_CTIMEOUT);
1620 secdrv = security_getdriver(hostp->disks->security_driver);
1621 if (secdrv == NULL) {
1622 hostp->up = HOST_DONE;
1624 _("Could not find security driver '%s' for host '%s'"),
1625 hostp->disks->security_driver, hostp->hostname);
1629 hostp->up = HOST_ACTIVE;
1631 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1635 if(est(dp)->state == DISK_ACTIVE) {
1636 est(dp)->errstr = NULL;
1637 enqueue_disk(&waitq, dp);
1641 protocol_sendreq(hostp->hostname, secdrv, amhost_get_security_conf,
1642 req, timeout, handle_result, hostp);
1647 static disk_t *lookup_hostdisk(
1648 /*@keep@*/ am_host_t *hp,
1653 for(dp = hp->disks; dp != NULL; dp = dp->hostnext)
1654 if(strcmp(str, dp->name) == 0) return dp;
1660 static void handle_result(
1663 security_handle_t *sech)
1669 char *msg, msg_undo;
1670 char *remoterr, *errbuf = NULL;
1681 hostp = (am_host_t *)datap;
1682 hostp->up = HOST_READY;
1685 errbuf = vstrallocf(_("Request to %s failed: %s"),
1686 hostp->hostname, security_geterror(sech));
1689 if (pkt->type == P_NAK) {
1691 if(strncmp_const_skip(s, "ERROR ", s, ch) == 0) {
1694 goto NAK_parse_failed;
1696 skip_whitespace(s, ch);
1697 if(ch == '\0') goto NAK_parse_failed;
1699 if((s = strchr(remoterr, '\n')) != NULL) {
1700 if(s == remoterr) goto NAK_parse_failed;
1703 if (strcmp(remoterr, "unknown service: noop") != 0
1704 && strcmp(remoterr, "noop: invalid service") != 0) {
1705 errbuf = vstralloc(hostp->hostname, " NAK: ", remoterr, NULL);
1716 if(strncmp_const(line, "OPTIONS ") == 0) {
1717 t = strstr(line, "features=");
1718 if(t != NULL && (isspace((int)t[-1]) || t[-1] == ';')) {
1719 t += SIZEOF("features=")-1;
1720 am_release_feature_set(hostp->features);
1721 if((hostp->features = am_string_to_feature(t)) == NULL) {
1722 errbuf = vstrallocf(hostp->hostname,
1723 _(": bad features value: %s\n"), line);
1727 skip_quoted_line(s, ch);
1732 if(strncmp_const_skip(t, "ERROR ", t, tch) == 0) {
1734 skip_whitespace(t, tch);
1740 * If the "error" is that the "noop" service is unknown, it
1741 * just means the client is "old" (does not support the servie).
1742 * We can ignore this.
1744 if(hostp->features == NULL
1745 && pkt->type == P_NAK
1746 && (strcmp(t - 1, "unknown service: noop") == 0
1747 || strcmp(t - 1, "noop: invalid service") == 0)) {
1748 skip_quoted_line(s, ch);
1751 errbuf = vstralloc(hostp->hostname,
1752 (pkt->type == P_NAK) ? "NAK " : "",
1761 skip_quoted_string(t, tch);
1763 disk = unquote_string(msg);
1765 skip_whitespace(t, tch);
1767 if (sscanf(t - 1, "%d", &level) != 1) {
1771 skip_integer(t, tch);
1772 skip_whitespace(t, tch);
1774 dp = lookup_hostdisk(hostp, disk);
1775 dp = lookup_hostdisk(hostp, disk);
1777 log_add(L_ERROR, _("%s: invalid reply from sendsize: `%s'\n"),
1778 hostp->hostname, line);
1783 if (strncmp_const(t-1,"SIZE ") == 0) {
1784 if (sscanf(t - 1, "SIZE %lld", &size_) != 1) {
1787 size = (off_t)size_;
1788 } else if (strncmp_const(t-1,"ERROR ") == 0) {
1789 skip_non_whitespace(t, tch);
1790 skip_whitespace(t, tch);
1792 skip_quoted_string(t,tch);
1795 if (pkt->type == P_REP) {
1796 est(dp)->errstr = unquote_string(msg);
1805 if (size > (off_t)-1) {
1806 for(i = 0; i < MAX_LEVELS; i++) {
1807 if(est(dp)->level[i] == level) {
1808 est(dp)->est_size[i] = size;
1812 if(i == MAX_LEVELS) {
1813 goto bad_msg; /* this est wasn't requested */
1815 est(dp)->got_estimate++;
1819 skip_quoted_line(s, ch);
1822 if(hostp->up == HOST_READY && hostp->features == NULL) {
1824 * The client does not support the features list, so give it an
1827 dbprintf(_("no feature set from host %s\n"), hostp->hostname);
1828 hostp->features = am_set_default_feature_set();
1831 security_close_connection(sech, hostp->hostname);
1833 /* XXX what about disks that only got some estimates... do we care? */
1834 /* XXX amanda 2.1 treated that case as a bad msg */
1836 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1837 if(dp->todo == 0) continue;
1838 if(est(dp)->state != DISK_ACTIVE &&
1839 est(dp)->state != DISK_PARTIALY_DONE) continue;
1841 if(est(dp)->state == DISK_ACTIVE) {
1842 remove_disk(&waitq, dp);
1844 else if(est(dp)->state == DISK_PARTIALY_DONE) {
1845 remove_disk(&pestq, dp);
1848 if(pkt->type == P_REP) {
1849 est(dp)->state = DISK_DONE;
1851 else if(pkt->type == P_PREP) {
1852 est(dp)->state = DISK_PARTIALY_DONE;
1855 if(est(dp)->level[0] == -1) continue; /* ignore this disk */
1858 qname = quote_string(dp->name);
1859 if(pkt->type == P_PREP) {
1860 g_fprintf(stderr,_("%s: time %s: got partial result for host %s disk %s:"),
1861 get_pname(), walltime_str(curclock()),
1862 dp->host->hostname, qname);
1863 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1864 est(dp)->level[0], (long long)est(dp)->est_size[0],
1865 est(dp)->level[1], (long long)est(dp)->est_size[1],
1866 est(dp)->level[2], (long long)est(dp)->est_size[2]);
1867 enqueue_disk(&pestq, dp);
1869 else if(pkt->type == P_REP) {
1870 g_fprintf(stderr,_("%s: time %s: got result for host %s disk %s:"),
1871 get_pname(), walltime_str(curclock()),
1872 dp->host->hostname, qname);
1873 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1874 est(dp)->level[0], (long long)est(dp)->est_size[0],
1875 est(dp)->level[1], (long long)est(dp)->est_size[1],
1876 est(dp)->level[2], (long long)est(dp)->est_size[2]);
1877 if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > (off_t)0) ||
1878 (est(dp)->level[1] != -1 && est(dp)->est_size[1] > (off_t)0) ||
1879 (est(dp)->level[2] != -1 && est(dp)->est_size[2] > (off_t)0)) {
1881 if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < (off_t)0) {
1883 _("disk %s:%s, estimate of level %d failed."),
1884 dp->host->hostname, qname, est(dp)->level[2]);
1885 est(dp)->level[2] = -1;
1887 if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < (off_t)0) {
1889 _("disk %s:%s, estimate of level %d failed."),
1890 dp->host->hostname, qname,
1892 est(dp)->level[1] = -1;
1894 if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < (off_t)0) {
1896 _("disk %s:%s, estimate of level %d failed."),
1897 dp->host->hostname, qname, est(dp)->level[0]);
1898 est(dp)->level[0] = -1;
1900 enqueue_disk(&estq, dp);
1903 enqueue_disk(&failq, dp);
1904 if(est(dp)->got_estimate) {
1905 est(dp)->errstr = vstrallocf("disk %s, all estimate failed",
1910 _("error result for host %s disk %s: missing estimate\n"),
1911 dp->host->hostname, qname);
1912 if (est(dp)->errstr == NULL) {
1913 est(dp)->errstr = vstrallocf(_("missing result for %s in %s response"),
1914 qname, dp->host->hostname);
1922 /* try to clean up any defunct processes, since Amanda doesn't wait() for
1924 while(waitpid(-1, NULL, WNOHANG)> 0);
1929 errbuf = vstrallocf(_("%s NAK: [NAK parse failed]"), hostp->hostname);
1930 g_fprintf(stderr, _("got strange nak from %s:\n----\n%s----\n\n"),
1931 hostp->hostname, pkt->body);
1935 g_fprintf(stderr,_("got a bad message, stopped at:\n"));
1937 g_fprintf(stderr,_("----\n%s----\n\n"), line);
1938 errbuf = stralloc2(_("badly formatted response from "), hostp->hostname);
1943 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1945 if(est(dp)->state == DISK_ACTIVE) {
1946 qname = quote_string(dp->name);
1947 est(dp)->state = DISK_DONE;
1948 remove_disk(&waitq, dp);
1949 enqueue_disk(&failq, dp);
1952 est(dp)->errstr = stralloc(errbuf);
1953 g_fprintf(stderr, _("error result for host %s disk %s: %s\n"),
1954 dp->host->hostname, qname, errbuf);
1962 * If there were no disks involved, make sure the error gets
1965 log_add(L_ERROR, "%s", errbuf);
1967 hostp->up = HOST_DONE;
1969 /* try to clean up any defunct processes, since Amanda doesn't wait() for
1971 while(waitpid(-1, NULL, WNOHANG)> 0);
1978 * ========================================================================
1983 static int schedule_order(disk_t *a, disk_t *b); /* subroutines */
1984 static int pick_inclevel(disk_t *dp);
1986 static void analyze_estimate(
1992 char *qname = quote_string(dp->name);
1996 g_fprintf(stderr, _("pondering %s:%s... "),
1997 dp->host->hostname, qname);
1998 g_fprintf(stderr, _("next_level0 %d last_level %d "),
1999 ep->next_level0, ep->last_level);
2001 if(get_info(dp->host->hostname, dp->name, &info) == 0) {
2005 ep->degr_level = -1;
2006 ep->degr_nsize = (off_t)-1;
2007 ep->degr_csize = (off_t)-1;
2009 if(ep->next_level0 <= 0 || (have_info && ep->last_level == 0
2010 && (info.command & FORCE_NO_BUMP))) {
2011 if(ep->next_level0 <= 0) {
2012 g_fprintf(stderr,_("(due for level 0) "));
2015 ep->dump_nsize = est_size(dp, 0);
2016 ep->dump_csize = est_tape_size(dp, 0);
2017 if(ep->dump_csize <= (off_t)0) {
2019 _("(no estimate for level 0, picking an incr level)\n"));
2020 ep->dump_level = pick_inclevel(dp);
2021 ep->dump_nsize = est_size(dp, ep->dump_level);
2022 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2024 if(ep->dump_nsize == (off_t)-1) {
2025 ep->dump_level = ep->dump_level + 1;
2026 ep->dump_nsize = est_size(dp, ep->dump_level);
2027 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2031 total_lev0 += (double) ep->dump_csize;
2032 if(ep->last_level == -1 || dp->skip_incr) {
2033 g_fprintf(stderr,_("(%s disk, can't switch to degraded mode)\n"),
2034 dp->skip_incr? "skip-incr":_("new"));
2035 ep->degr_level = -1;
2036 ep->degr_nsize = (off_t)-1;
2037 ep->degr_csize = (off_t)-1;
2040 /* fill in degraded mode info */
2041 g_fprintf(stderr,_("(picking inclevel for degraded mode)"));
2042 ep->degr_level = pick_inclevel(dp);
2043 ep->degr_nsize = est_size(dp, ep->degr_level);
2044 ep->degr_csize = est_tape_size(dp, ep->degr_level);
2045 if(ep->degr_csize == (off_t)-1) {
2046 ep->degr_level = ep->degr_level + 1;
2047 ep->degr_nsize = est_size(dp, ep->degr_level);
2048 ep->degr_csize = est_tape_size(dp, ep->degr_level);
2050 if(ep->degr_csize == (off_t)-1) {
2051 g_fprintf(stderr,_("(no inc estimate)"));
2052 ep->degr_level = -1;
2054 g_fprintf(stderr,"\n");
2059 g_fprintf(stderr,_("(not due for a full dump, picking an incr level)\n"));
2060 /* XXX - if this returns -1 may be we should force a total? */
2061 ep->dump_level = pick_inclevel(dp);
2062 ep->dump_nsize = est_size(dp, ep->dump_level);
2063 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2065 if(ep->dump_csize == (off_t)-1) {
2066 ep->dump_level = ep->last_level;
2067 ep->dump_nsize = est_size(dp, ep->dump_level);
2068 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2070 if(ep->dump_csize == (off_t)-1) {
2071 ep->dump_level = ep->last_level + 1;
2072 ep->dump_nsize = est_size(dp, ep->dump_level);
2073 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2075 if(ep->dump_csize == (off_t)-1) {
2077 ep->dump_nsize = est_size(dp, ep->dump_level);
2078 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2082 g_fprintf(stderr,_(" curr level %d nsize %lld csize %lld "),
2083 ep->dump_level, (long long)ep->dump_nsize,
2084 (long long)ep->dump_csize);
2086 insert_disk(&schedq, dp, schedule_order);
2088 total_size += (off_t)tt_blocksize_kb + ep->dump_csize + tape_mark;
2090 /* update the balanced size */
2091 if(!(dp->skip_full || dp->strategy == DS_NOFULL ||
2092 dp->strategy == DS_INCRONLY)) {
2095 lev0size = est_tape_size(dp, 0);
2096 if(lev0size == (off_t)-1) lev0size = ep->last_lev0size;
2098 balanced_size += (double)(lev0size / (off_t)runs_per_cycle);
2101 g_fprintf(stderr,_("total size %lld total_lev0 %1.0lf balanced-lev0size %1.0lf\n"),
2102 (long long)total_size, total_lev0, balanced_size);
2106 static void handle_failed(
2109 char *errstr, *errstr1, *qerrstr;
2110 char *qname = quote_string(dp->name);
2112 errstr = est(dp)->errstr? est(dp)->errstr : _("hmm, no error indicator!");
2113 errstr1 = vstralloc("[",errstr,"]", NULL);
2114 qerrstr = quote_string(errstr1);
2117 g_fprintf(stderr, _("%s: FAILED %s %s %s 0 %s\n"),
2118 get_pname(), dp->host->hostname, qname, planner_timestamp, qerrstr);
2120 log_add(L_FAIL, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2121 planner_timestamp, qerrstr);
2125 /* XXX - memory leak with *dp */
2130 * insert-sort by decreasing priority, then
2131 * by decreasing size within priority levels.
2134 static int schedule_order(
2141 diff = est(b)->dump_priority - est(a)->dump_priority;
2142 if(diff != 0) return diff;
2144 ldiff = est(b)->dump_csize - est(a)->dump_csize;
2145 if(ldiff < (off_t)0) return -1; /* XXX - there has to be a better way to dothis */
2146 if(ldiff > (off_t)0) return 1;
2151 static int pick_inclevel(
2154 int base_level, bump_level;
2155 off_t base_size, bump_size;
2159 base_level = est(dp)->last_level;
2161 /* if last night was level 0, do level 1 tonight, no ifs or buts */
2162 if(base_level == 0) {
2163 g_fprintf(stderr,_(" picklev: last night 0, so tonight level 1\n"));
2167 /* if no-full option set, always do level 1 */
2168 if(dp->strategy == DS_NOFULL) {
2169 g_fprintf(stderr,_(" picklev: no-full set, so always level 1\n"));
2173 base_size = est_size(dp, base_level);
2175 /* if we didn't get an estimate, we can't do an inc */
2176 if(base_size == (off_t)-1) {
2177 base_size = est_size(dp, base_level+1);
2178 if(base_size > (off_t)0) /* FORCE_BUMP */
2179 return base_level+1;
2180 g_fprintf(stderr,_(" picklev: no estimate for level %d, so no incs\n"), base_level);
2184 thresh = bump_thresh(base_level, est_size(dp, 0), dp->bumppercent, dp->bumpsize, dp->bumpmult);
2187 _(" pick: size %lld level %d days %d (thresh %lldK, %d days)\n"),
2188 (long long)base_size, base_level, est(dp)->level_days,
2189 (long long)thresh, dp->bumpdays);
2192 || est(dp)->level_days < dp->bumpdays
2193 || base_size <= thresh)
2196 bump_level = base_level + 1;
2197 bump_size = est_size(dp, bump_level);
2199 if(bump_size == (off_t)-1) return base_level;
2201 g_fprintf(stderr, _(" pick: next size %lld... "),
2202 (long long)bump_size);
2204 if(base_size - bump_size < thresh) {
2205 g_fprintf(stderr, _("not bumped\n"));
2209 qname = quote_string(dp->name);
2210 g_fprintf(stderr, _("BUMPED\n"));
2211 log_add(L_INFO, _("Incremental of %s:%s bumped to level %d."),
2212 dp->host->hostname, qname, bump_level);
2222 ** ========================================================================
2225 ** We have two strategies here:
2229 ** If we are trying to fit too much on the tape something has to go. We
2230 ** try to delay totals until tomorrow by converting them into incrementals
2231 ** and, if that is not effective enough, dropping incrementals altogether.
2232 ** While we are searching for the guilty dump (the one that is really
2233 ** causing the schedule to be oversize) we have probably trampled on a lot of
2234 ** innocent dumps, so we maintain a "before image" list and use this to
2235 ** put back what we can.
2237 ** 2. Promote dumps.
2239 ** We try to keep the amount of tape used by total dumps the same each night.
2240 ** If there is some spare tape in this run we have a look to see if any of
2241 ** tonights incrementals could be promoted to totals and leave us with a
2242 ** more balanced cycle.
2245 static void delay_one_dump(disk_t *dp, int delete, ...);
2246 static int promote_highest_priority_incremental(void);
2247 static int promote_hills(void);
2249 /* delay any dumps that will not fit */
2250 static void delay_dumps(void)
2257 off_t new_total; /* New total_size */
2258 char est_kb[20]; /* Text formatted dump size */
2259 int nb_forced_level_0;
2265 biq.head = biq.tail = NULL;
2268 ** 1. Delay dumps that are way oversize.
2270 ** Dumps larger that the size of the tapes we are using are just plain
2271 ** not going to fit no matter how many other dumps we drop. Delay
2272 ** oversize totals until tomorrow (by which time my owner will have
2273 ** resolved the problem!) and drop incrementals altogether. Naturally
2274 ** a large total might be delayed into a large incremental so these
2275 ** need to be checked for separately.
2278 for(dp = schedq.head; dp != NULL; dp = ndp) {
2279 int avail_tapes = 1;
2280 if (dp->tape_splitsize > (off_t)0)
2281 avail_tapes = conf_runtapes;
2283 ndp = dp->next; /* remove_disk zaps this */
2285 full_size = est_tape_size(dp, 0);
2286 if (full_size > tapetype_get_length(tape) * (off_t)avail_tapes) {
2287 char *qname = quote_string(dp->name);
2288 if (conf_runtapes > 1 && dp->tape_splitsize == (off_t)0) {
2289 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"
2290 ", you could define a splitsize"),
2291 dp->host->hostname, qname,
2292 (long long)full_size);
2294 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"),
2295 dp->host->hostname, qname,
2296 (long long)full_size);
2301 if (est(dp)->dump_csize == (off_t)-1 ||
2302 est(dp)->dump_csize <= tapetype_get_length(tape) * (off_t)avail_tapes) {
2306 /* Format dumpsize for messages */
2307 g_snprintf(est_kb, 20, "%lld KB,",
2308 (long long)est(dp)->dump_csize);
2310 if(est(dp)->dump_level == 0) {
2313 message = _("but cannot incremental dump skip-incr disk");
2315 else if(est(dp)->last_level < 0) {
2317 message = _("but cannot incremental dump new disk");
2319 else if(est(dp)->degr_level < 0) {
2321 message = _("but no incremental estimate");
2323 else if (est(dp)->degr_csize > tapetype_get_length(tape)) {
2325 message = _("incremental dump also larger than tape");
2329 message = _("full dump delayed");
2334 message = _("skipping incremental");
2336 delay_one_dump(dp, delete, _("dump larger than available tape space,"),
2337 est_kb, message, NULL);
2341 ** 2. Delay total dumps.
2343 ** Delay total dumps until tomorrow (or the day after!). We start with
2344 ** the lowest priority (most dispensable) and work forwards. We take
2345 ** care not to delay *all* the dumps since this could lead to a stale
2346 ** mate [for any one disk there are only three ways tomorrows dump will
2347 ** be smaller than todays: 1. we do a level 0 today so tomorows dump
2348 ** will be a level 1; 2. the disk gets more data so that it is bumped
2349 ** tomorrow (this can be a slow process); and, 3. the disk looses some
2350 ** data (when does that ever happen?)].
2353 nb_forced_level_0 = 0;
2355 for(dp = schedq.head; dp != NULL && preserve == NULL; dp = dp->next)
2356 if(est(dp)->dump_level == 0)
2359 /* 2.a. Do not delay forced full */
2360 for(dp = schedq.tail;
2361 dp != NULL && total_size > tape_length;
2365 if(est(dp)->dump_level != 0) continue;
2367 get_info(dp->host->hostname, dp->name, &info);
2368 if(info.command & FORCE_FULL) {
2369 nb_forced_level_0 += 1;
2374 if(dp != preserve) {
2376 /* Format dumpsize for messages */
2377 g_snprintf(est_kb, 20, "%lld KB,",
2378 (long long)est(dp)->dump_csize);
2382 message = _("but cannot incremental dump skip-incr disk");
2384 else if(est(dp)->last_level < 0) {
2386 message = _("but cannot incremental dump new disk");
2388 else if(est(dp)->degr_level < 0) {
2390 message = _("but no incremental estimate");
2394 message = _("full dump delayed");
2396 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2401 /* 2.b. Delay forced full if needed */
2402 if(nb_forced_level_0 > 0 && total_size > tape_length) {
2403 for(dp = schedq.tail;
2404 dp != NULL && total_size > tape_length;
2408 if(est(dp)->dump_level == 0 && dp != preserve) {
2410 /* Format dumpsize for messages */
2411 g_snprintf(est_kb, 20, "%lld KB,",
2412 (long long)est(dp)->dump_csize);
2416 message = _("but cannot incremental dump skip-incr disk");
2418 else if(est(dp)->last_level < 0) {
2420 message = _("but cannot incremental dump new disk");
2422 else if(est(dp)->degr_level < 0) {
2424 message = _("but no incremental estimate");
2428 message = _("full dump delayed");
2430 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2437 ** 3. Delay incremental dumps.
2439 ** Delay incremental dumps until tomorrow. This is a last ditch attempt
2440 ** at making things fit. Again, we start with the lowest priority (most
2441 ** dispensable) and work forwards.
2444 for(dp = schedq.tail;
2445 dp != NULL && total_size > tape_length;
2449 if(est(dp)->dump_level != 0) {
2451 /* Format dumpsize for messages */
2452 g_snprintf(est_kb, 20, "%lld KB,",
2453 (long long)est(dp)->dump_csize);
2455 delay_one_dump(dp, 1,
2456 _("dumps way too big,"),
2458 _("must skip incremental dumps"),
2464 ** 4. Reinstate delayed dumps.
2466 ** We might not have needed to stomp on all of the dumps we have just
2467 ** delayed above. Try to reinstate them all starting with the last one
2468 ** and working forwards. It is unlikely that the last one will fit back
2469 ** in but why complicate the code?
2472 /*@i@*/ for(bi = biq.tail; bi != NULL; bi = nbi) {
2473 int avail_tapes = 1;
2476 if(dp->tape_splitsize > (off_t)0)
2477 avail_tapes = conf_runtapes;
2480 new_total = total_size + (off_t)tt_blocksize_kb +
2481 bi->csize + (off_t)tape_mark;
2483 new_total = total_size - est(dp)->dump_csize + bi->csize;
2485 if((new_total <= tape_length) &&
2486 (bi->csize < (tapetype_get_length(tape) * (off_t)avail_tapes))) {
2488 total_size = new_total;
2490 if(bi->level == 0) {
2491 total_lev0 += (double) bi->csize;
2493 insert_disk(&schedq, dp, schedule_order);
2496 est(dp)->dump_level = bi->level;
2497 est(dp)->dump_nsize = bi->nsize;
2498 est(dp)->dump_csize = bi->csize;
2502 if(bi->next == NULL)
2503 biq.tail = bi->prev;
2505 (bi->next)->prev = bi->prev;
2506 if(bi->prev == NULL)
2507 biq.head = bi->next;
2509 (bi->prev)->next = bi->next;
2517 ** 5. Output messages about what we have done.
2519 ** We can't output messages while we are delaying dumps because we might
2520 ** reinstate them later. We remember all the messages and output them
2524 /*@i@*/ for(bi = biq.head; bi != NULL; bi = nbi) {
2527 g_fprintf(stderr, "%s: FAILED %s\n", get_pname(), bi->errstr);
2528 log_add(L_FAIL, "%s", bi->errstr);
2532 g_fprintf(stderr, _(" delay: %s now at level %d\n"),
2533 bi->errstr, est(dp)->dump_level);
2534 log_add(L_INFO, "%s", bi->errstr);
2542 g_fprintf(stderr, _(" delay: Total size now %lld.\n"),
2543 (long long)total_size);
2550 * Remove a dump or modify it from full to incremental.
2551 * Keep track of it on the bi q in case we can add it back later.
2554 static void delay_one_dump,
2560 char level_str[NUM_STR_SIZE];
2563 char *qname = quote_string(dp->name);
2564 char *errstr, *qerrstr;
2566 arglist_start(argp, delete);
2568 total_size -= (off_t)tt_blocksize_kb + est(dp)->dump_csize + (off_t)tape_mark;
2569 if(est(dp)->dump_level == 0) {
2570 total_lev0 -= (double) est(dp)->dump_csize;
2573 bi = alloc(SIZEOF(bi_t));
2575 bi->prev = biq.tail;
2576 if(biq.tail == NULL)
2579 biq.tail->next = bi;
2582 bi->deleted = delete;
2584 bi->level = est(dp)->dump_level;
2585 bi->nsize = est(dp)->dump_nsize;
2586 bi->csize = est(dp)->dump_csize;
2588 g_snprintf(level_str, SIZEOF(level_str), "%d", est(dp)->dump_level);
2589 bi->errstr = vstralloc(dp->host->hostname,
2591 " ", planner_timestamp ? planner_timestamp : "?",
2596 while ((next = arglist_val(argp, char *)) != NULL) {
2597 vstrextend(&errstr, sep, next, NULL);
2600 strappend(errstr, "]");
2601 qerrstr = quote_string(errstr);
2602 vstrextend(&bi->errstr, " ", qerrstr, NULL);
2608 remove_disk(&schedq, dp);
2610 est(dp)->dump_level = est(dp)->degr_level;
2611 est(dp)->dump_nsize = est(dp)->degr_nsize;
2612 est(dp)->dump_csize = est(dp)->degr_csize;
2613 total_size += (off_t)tt_blocksize_kb + est(dp)->dump_csize + (off_t)tape_mark;
2620 static int promote_highest_priority_incremental(void)
2622 disk_t *dp, *dp1, *dp_promote;
2623 off_t new_size, new_total, new_lev0;
2625 int nb_today, nb_same_day, nb_today2;
2626 int nb_disk_today, nb_disk_same_day;
2630 * return 1 if did so; must update total_size correctly; must not
2631 * cause total_size to exceed tape_length
2635 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2637 est(dp)->promote = -1000;
2639 if(est_size(dp,0) <= (off_t)0)
2642 if(est(dp)->next_level0 <= 0)
2645 if(est(dp)->next_level0 > dp->maxpromoteday)
2648 new_size = est_tape_size(dp, 0);
2649 new_total = total_size - est(dp)->dump_csize + new_size;
2650 new_lev0 = (off_t)total_lev0 + new_size;
2655 nb_disk_same_day = 0;
2656 for(dp1 = schedq.head; dp1 != NULL; dp1 = dp1->next) {
2657 if(est(dp1)->dump_level == 0)
2659 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2661 if(strcmp(dp->host->hostname, dp1->host->hostname) == 0) {
2662 if(est(dp1)->dump_level == 0)
2664 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2669 /* do not promote if overflow tape */
2670 if(new_total > tape_length)
2673 /* do not promote if overflow balanced size and something today */
2674 /* promote if nothing today */
2675 if((new_lev0 > (off_t)(balanced_size + balance_threshold)) &&
2676 (nb_disk_today > 0))
2679 /* do not promote if only one disk due that day and nothing today */
2680 if(nb_disk_same_day == 1 && nb_disk_today == 0)
2683 nb_today2 = nb_today*nb_today;
2684 if(nb_today == 0 && nb_same_day > 1)
2687 if(nb_same_day >= nb_today2) {
2688 est(dp)->promote = ((nb_same_day - nb_today2)*(nb_same_day - nb_today2)) +
2689 conf_dumpcycle - est(dp)->next_level0;
2692 est(dp)->promote = -nb_today2 +
2693 conf_dumpcycle - est(dp)->next_level0;
2696 qname = quote_string(dp->name);
2697 if(!dp_promote || est(dp_promote)->promote < est(dp)->promote) {
2699 g_fprintf(stderr," try %s:%s %d %d %d = %d\n",
2700 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2703 g_fprintf(stderr,"no try %s:%s %d %d %d = %d\n",
2704 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2712 qname = quote_string(dp->name);
2713 new_size = est_tape_size(dp, 0);
2714 new_total = total_size - est(dp)->dump_csize + new_size;
2715 new_lev0 = (off_t)total_lev0 + new_size;
2717 total_size = new_total;
2718 total_lev0 = (double)new_lev0;
2719 check_days = est(dp)->next_level0;
2720 est(dp)->degr_level = est(dp)->dump_level;
2721 est(dp)->degr_nsize = est(dp)->dump_nsize;
2722 est(dp)->degr_csize = est(dp)->dump_csize;
2723 est(dp)->dump_level = 0;
2724 est(dp)->dump_nsize = est_size(dp, 0);
2725 est(dp)->dump_csize = new_size;
2726 est(dp)->next_level0 = 0;
2729 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2730 dp->host->hostname, qname,
2731 total_lev0, (long long)total_size);
2734 plural(_("Full dump of %s:%s promoted from %d day ahead."),
2735 _("Full dump of %s:%s promoted from %d days ahead."),
2737 dp->host->hostname, qname, check_days);
2745 static int promote_hills(void)
2748 struct balance_stats {
2760 /* If we are already doing a level 0 don't bother */
2764 /* Do the guts of an "amadmin balance" */
2765 my_dumpcycle = conf_dumpcycle;
2766 if(my_dumpcycle > 10000) my_dumpcycle = 10000;
2768 sp = (struct balance_stats *)
2769 alloc(SIZEOF(struct balance_stats) * my_dumpcycle);
2771 for(days = 0; days < my_dumpcycle; days++) {
2773 sp[days].size = (off_t)0;
2776 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2777 days = est(dp)->next_level0; /* This is > 0 by definition */
2778 if(days<my_dumpcycle && !dp->skip_full && dp->strategy != DS_NOFULL &&
2779 dp->strategy != DS_INCRONLY) {
2781 sp[days].size += est(dp)->last_lev0size;
2785 /* Search for a suitable big hill and cut it down */
2787 /* Find the tallest hill */
2788 hill_size = (off_t)0;
2789 for(days = 0; days < my_dumpcycle; days++) {
2790 if(sp[days].disks > 1 && sp[days].size > hill_size) {
2791 hill_size = sp[days].size;
2796 if(hill_size <= (off_t)0) break; /* no suitable hills */
2798 /* Find all the dumps in that hill and try and remove one */
2799 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2800 if(est(dp)->next_level0 != hill_days ||
2801 est(dp)->next_level0 > dp->maxpromoteday ||
2803 dp->strategy == DS_NOFULL ||
2804 dp->strategy == DS_INCRONLY)
2806 new_size = est_tape_size(dp, 0);
2807 new_total = total_size - est(dp)->dump_csize + new_size;
2808 if(new_total > tape_length)
2810 /* We found a disk we can promote */
2811 qname = quote_string(dp->name);
2812 total_size = new_total;
2813 total_lev0 += (double)new_size;
2814 est(dp)->degr_level = est(dp)->dump_level;
2815 est(dp)->degr_nsize = est(dp)->dump_nsize;
2816 est(dp)->degr_csize = est(dp)->dump_csize;
2817 est(dp)->dump_level = 0;
2818 est(dp)->next_level0 = 0;
2819 est(dp)->dump_nsize = est_size(dp, 0);
2820 est(dp)->dump_csize = new_size;
2823 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2824 dp->host->hostname, qname,
2825 total_lev0, (long long)total_size);
2828 plural(_("Full dump of %s:%s specially promoted from %d day ahead."),
2829 _("Full dump of %s:%s specially promoted from %d days ahead."),
2831 dp->host->hostname, qname, hill_days);
2837 /* All the disks in that hill were unsuitable. */
2838 sp[hill_days].disks = 0; /* Don't get tricked again */
2846 * ========================================================================
2849 * XXX - memory leak - we shouldn't just throw away *dp
2851 static void output_scheduleline(
2855 time_t dump_time = 0, degr_time = 0;
2856 double dump_kps = 0, degr_kps = 0;
2857 char *schedline = NULL, *degr_str = NULL;
2858 char dump_priority_str[NUM_STR_SIZE];
2859 char dump_level_str[NUM_STR_SIZE];
2860 char dump_nsize_str[NUM_STR_SIZE];
2861 char dump_csize_str[NUM_STR_SIZE];
2862 char dump_time_str[NUM_STR_SIZE];
2863 char dump_kps_str[NUM_STR_SIZE];
2864 char degr_level_str[NUM_STR_SIZE];
2865 char degr_nsize_str[NUM_STR_SIZE];
2866 char degr_csize_str[NUM_STR_SIZE];
2867 char degr_time_str[NUM_STR_SIZE];
2868 char degr_kps_str[NUM_STR_SIZE];
2869 char *dump_date, *degr_date;
2872 char *qname = quote_string(dp->name);
2876 if(ep->dump_csize == (off_t)-1) {
2877 /* no estimate, fail the disk */
2879 _("%s: FAILED %s %s %s %d \"[no estimate]\"\n"),
2881 dp->host->hostname, qname, planner_timestamp, ep->dump_level);
2882 log_add(L_FAIL, _("%s %s %s %d [no estimate]"),
2883 dp->host->hostname, qname, planner_timestamp, ep->dump_level);
2888 dump_date = degr_date = (char *)0;
2889 for(i = 0; i < MAX_LEVELS; i++) {
2890 if(ep->dump_level == ep->level[i])
2891 dump_date = ep->dumpdate[i];
2892 if(ep->degr_level == ep->level[i])
2893 degr_date = ep->dumpdate[i];
2896 #define fix_rate(rate) (rate < 1.0 ? DEFAULT_DUMPRATE : rate)
2898 if(ep->dump_level == 0) {
2899 dump_kps = fix_rate(ep->fullrate);
2900 dump_time = (time_t)((double)ep->dump_csize / dump_kps);
2902 if(ep->degr_csize != (off_t)-1) {
2903 degr_kps = fix_rate(ep->incrrate);
2904 degr_time = (time_t)((double)ep->degr_csize / degr_kps);
2908 dump_kps = fix_rate(ep->incrrate);
2909 dump_time = (time_t)((double)ep->dump_csize / dump_kps);
2912 if(ep->dump_level == 0 && ep->degr_csize != (off_t)-1) {
2913 g_snprintf(degr_level_str, sizeof(degr_level_str),
2914 "%d", ep->degr_level);
2915 g_snprintf(degr_nsize_str, sizeof(degr_nsize_str),
2916 "%lld", (long long)ep->degr_nsize);
2917 g_snprintf(degr_csize_str, sizeof(degr_csize_str),
2918 "%lld", (long long)ep->degr_csize);
2919 g_snprintf(degr_time_str, sizeof(degr_time_str),
2920 "%lld", (long long)degr_time);
2921 g_snprintf(degr_kps_str, sizeof(degr_kps_str),
2923 degr_str = vstralloc(" ", degr_level_str,
2925 " ", degr_nsize_str,
2926 " ", degr_csize_str,
2931 g_snprintf(dump_priority_str, SIZEOF(dump_priority_str),
2932 "%d", ep->dump_priority);
2933 g_snprintf(dump_level_str, SIZEOF(dump_level_str),
2934 "%d", ep->dump_level);
2935 g_snprintf(dump_nsize_str, sizeof(dump_nsize_str),
2936 "%lld", (long long)ep->dump_nsize);
2937 g_snprintf(dump_csize_str, sizeof(dump_csize_str),
2938 "%lld", (long long)ep->dump_csize);
2939 g_snprintf(dump_time_str, sizeof(dump_time_str),
2940 "%lld", (long long)dump_time);
2941 g_snprintf(dump_kps_str, sizeof(dump_kps_str),
2943 features = am_feature_to_string(dp->host->features);
2944 schedline = vstralloc("DUMP ",dp->host->hostname,
2947 " ", planner_timestamp,
2948 " ", dump_priority_str,
2949 " ", dump_level_str,
2951 " ", dump_nsize_str,
2952 " ", dump_csize_str,
2955 degr_str ? degr_str : "",
2958 fputs(schedline, stdout);
2959 fputs(schedline, stderr);