2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1999 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: planner.c,v 1.206 2006/08/10 23:57:27 paddy_s Exp $
29 * backup schedule planner for the Amanda backup system.
43 #include "amfeatures.h"
44 #include "server_util.h"
46 #include "timestamp.h"
48 #define planner_debug(i,x) do { \
49 if ((i) <= debug_planner) { \
54 #define MAX_LEVELS 3 /* max# of estimates per filesys */
56 #define RUNS_REDZONE 5 /* should be in conf file? */
58 #define PROMOTE_THRESHOLD 0.05 /* if <5% unbalanced, don't promote */
59 #define DEFAULT_DUMPRATE 1024.0 /* K/s */
61 /* configuration file stuff */
64 off_t conf_maxdumpsize;
67 int conf_runspercycle;
72 int conf_usetimestamps;
74 #define HOST_READY ((void *)0) /* must be 0 */
75 #define HOST_ACTIVE ((void *)1)
76 #define HOST_DONE ((void *)2)
78 #define DISK_READY 0 /* must be 0 */
80 #define DISK_PARTIALY_DONE 2
83 typedef struct est_s {
88 off_t dump_nsize; /* native size */
89 off_t dump_csize; /* compressed size */
90 int degr_level; /* if dump_level == 0, what would be the inc level */
91 off_t degr_nsize; /* native degraded size */
92 off_t degr_csize; /* compressed degraded size */
98 double fullrate, incrrate;
99 double fullcomp, incrcomp;
101 int level[MAX_LEVELS];
102 char *dumpdate[MAX_LEVELS];
103 off_t est_size[MAX_LEVELS];
106 #define est(dp) ((est_t *)(dp)->up)
108 /* pestq = partial estimate */
109 disklist_t startq, waitq, pestq, estq, failq, schedq;
111 double total_lev0, balanced_size, balance_threshold;
117 size_t tt_blocksize_kb;
118 int runs_per_cycle = 0;
120 char *planner_timestamp = NULL;
122 static am_feature_t *our_features = NULL;
123 static char *our_feature_string = NULL;
125 /* We keep a LIFO queue of before images for all modifications made
126 * to schedq in our attempt to make the schedule fit on the tape.
127 * Enough information is stored to reinstate a dump if it turns out
128 * that it shouldn't have been touched after all.
130 typedef struct bi_s {
133 int deleted; /* 0=modified, 1=deleted */
134 disk_t *dp; /* The disk that was changed */
135 int level; /* The original level */
136 off_t nsize; /* The original native size */
137 off_t csize; /* The original compressed size */
138 char *errstr; /* A message describing why this disk is here */
141 typedef struct bilist_s {
145 bilist_t biq; /* The BI queue itself */
148 * ========================================================================
153 static void setup_estimate(disk_t *dp);
154 static void get_estimates(void);
155 static void analyze_estimate(disk_t *dp);
156 static void handle_failed(disk_t *dp);
157 static void delay_dumps(void);
158 static int promote_highest_priority_incremental(void);
159 static int promote_hills(void);
160 static void output_scheduleline(disk_t *dp);
161 int main(int, char **);
177 times_t section_start;
181 config_overwrites_t *cfg_ovr = NULL;
182 char *cfg_opt = NULL;
185 * Configure program for internationalization:
186 * 1) Only set the message locale for now.
187 * 2) Set textdomain for all amanda related programs to "amanda"
188 * We don't want to be forced to support dozens of message catalogs.
190 setlocale(LC_MESSAGES, "C");
191 textdomain("amanda");
193 /* drop root privileges */
194 if (!set_root_privs(0)) {
195 error(_("planner must be run setuid root"));
200 set_pname("planner");
202 dbopen(DBG_SUBDIR_SERVER);
204 cfg_ovr = extract_commandline_config_overwrites(&argc, &argv);
209 config_init(CONFIG_INIT_EXPLICIT_NAME | CONFIG_INIT_USE_CWD | CONFIG_INIT_FATAL,
211 apply_config_overwrites(cfg_ovr);
215 check_running_as(RUNNING_AS_DUMPUSER);
217 dbrename(config_name, DBG_SUBDIR_SERVER);
219 /* Don't die when child closes pipe */
220 signal(SIGPIPE, SIG_IGN);
222 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
224 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
225 set_logerror(logerror);
227 section_start = curclock();
229 our_features = am_init_feature_set();
230 our_feature_string = am_feature_to_string(our_features);
232 g_fprintf(stderr, _("%s: pid %ld executable %s version %s\n"),
233 get_pname(), (long) getpid(), argv[0], version());
234 for (i = 0; version_info[i] != NULL; i++)
235 g_fprintf(stderr, _("%s: %s"), get_pname(), version_info[i]);
238 if (argc > 3 && strcmp(argv[2], "--starttime") == 0) {
239 planner_timestamp = stralloc(argv[3]);
245 * 1. Networking Setup
252 * 2. Read in Configuration Information
254 * All the Amanda configuration files are loaded before we begin.
257 g_fprintf(stderr,_("READING CONF INFO...\n"));
259 conf_diskfile = config_dir_relative(getconf_str(CNF_DISKFILE));
260 if (read_diskfile(conf_diskfile, &origq) < 0) {
261 error(_("could not load disklist \"%s\""), conf_diskfile);
264 if(origq.head == NULL) {
265 error(_("empty disklist \"%s\""), conf_diskfile);
269 errstr = match_disklist(&origq, argc-diskarg_offset,
270 argv+diskarg_offset);
272 g_fprintf(stderr,"%s",errstr);
276 for(dp = origq.head; dp != NULL; dp = dp->next) {
278 qname = quote_string(dp->name);
279 log_add(L_DISK, "%s %s", dp->host->hostname, qname);
286 error(_("no DLE to backup"));
289 amfree(conf_diskfile);
291 conf_tapelist = config_dir_relative(getconf_str(CNF_TAPELIST));
292 if(read_tapelist(conf_tapelist)) {
293 error(_("could not load tapelist \"%s\""), conf_tapelist);
296 amfree(conf_tapelist);
298 conf_infofile = config_dir_relative(getconf_str(CNF_INFOFILE));
299 if(open_infofile(conf_infofile)) {
300 error(_("could not open info db \"%s\""), conf_infofile);
303 if (check_infofile(conf_infofile, &origq, &errstr) == -1) {
304 log_add(L_WARNING, "problem copying infofile: %s", errstr);
307 amfree(conf_infofile);
309 conf_tapetype = getconf_str(CNF_TAPETYPE);
310 conf_maxdumpsize = getconf_am64(CNF_MAXDUMPSIZE);
311 conf_runtapes = getconf_int(CNF_RUNTAPES);
312 conf_dumpcycle = getconf_int(CNF_DUMPCYCLE);
313 conf_runspercycle = getconf_int(CNF_RUNSPERCYCLE);
314 conf_tapecycle = getconf_int(CNF_TAPECYCLE);
315 conf_etimeout = (time_t)getconf_int(CNF_ETIMEOUT);
316 conf_reserve = getconf_int(CNF_RESERVE);
317 conf_autoflush = getconf_boolean(CNF_AUTOFLUSH);
318 conf_usetimestamps = getconf_boolean(CNF_USETIMESTAMPS);
321 if (planner_timestamp) {
322 if (conf_usetimestamps == 0) {
323 planner_timestamp[8] = '\0';
325 } else if(conf_usetimestamps == 0) {
326 planner_timestamp = get_datestamp_from_time(0);
329 planner_timestamp = get_timestamp_from_time(0);
331 log_add(L_START, _("date %s"), planner_timestamp);
332 g_printf("DATE %s\n", planner_timestamp);
334 g_fprintf(stderr, _("%s: timestamp %s\n"),
335 get_pname(), planner_timestamp);
337 /* some initializations */
339 if(conf_runspercycle == 0) {
340 runs_per_cycle = conf_dumpcycle;
341 } else if(conf_runspercycle == -1 ) {
342 runs_per_cycle = guess_runs_from_tapelist();
344 runs_per_cycle = conf_runspercycle;
346 if (runs_per_cycle <= 0) {
351 * do some basic sanity checking
353 if(conf_tapecycle <= runs_per_cycle) {
354 log_add(L_WARNING, _("tapecycle (%d) <= runspercycle (%d)"),
355 conf_tapecycle, runs_per_cycle);
358 tape = lookup_tapetype(conf_tapetype);
359 if(conf_maxdumpsize > (off_t)0) {
360 tape_length = (off_t)conf_maxdumpsize;
363 tape_length = tapetype_get_length(tape) * (off_t)conf_runtapes;
365 tape_mark = (size_t)tapetype_get_filemark(tape);
366 tt_blocksize_kb = (size_t)tapetype_get_blocksize(tape);
367 tt_blocksize = tt_blocksize_kb * 1024;
369 g_fprintf(stderr, _("%s: time %s: startup took %s secs\n"),
371 walltime_str(curclock()),
372 walltime_str(timessub(curclock(), section_start)));
375 * 3. Send autoflush dumps left on the holding disks
377 * This should give us something to do while we generate the new
381 g_fprintf(stderr,_("\nSENDING FLUSHES...\n"));
385 GSList *holding_list, *holding_file;
386 char *qdisk, *qhname;
388 /* get *all* flushable files in holding */
389 holding_list = holding_get_files_for_flush(NULL);
390 for(holding_file=holding_list; holding_file != NULL;
391 holding_file = holding_file->next) {
392 holding_file_get_dumpfile((char *)holding_file->data, &file);
394 if (holding_file_size((char *)holding_file->data, 1) <= 0) {
395 log_add(L_INFO, "%s: removing file with no data.",
396 (char *)holding_file->data);
397 holding_file_unlink((char *)holding_file->data);
401 qdisk = quote_string(file.disk);
402 qhname = quote_string((char *)holding_file->data);
403 log_add(L_DISK, "%s %s", file.name, qdisk);
405 "FLUSH %s %s %s %d %s\n",
412 "FLUSH %s %s %s %d %s\n",
421 g_slist_free_full(holding_list);
424 g_fprintf(stderr, _("ENDFLUSH\n"));
425 g_fprintf(stdout, _("ENDFLUSH\n"));
429 * 4. Calculate Preliminary Dump Levels
431 * Before we can get estimates from the remote slave hosts, we make a
432 * first attempt at guessing what dump levels we will be dumping at
433 * based on the curinfo database.
436 g_fprintf(stderr,_("\nSETTING UP FOR ESTIMATES...\n"));
437 section_start = curclock();
439 startq.head = startq.tail = NULL;
440 while(!empty(origq)) {
441 disk_t *dp = dequeue_disk(&origq);
447 g_fprintf(stderr, _("%s: time %s: setting up estimates took %s secs\n"),
449 walltime_str(curclock()),
450 walltime_str(timessub(curclock(), section_start)));
454 * 5. Get Dump Size Estimates from Remote Client Hosts
456 * Each host is queried (in parallel) for dump size information on all
457 * of its disks, and the results gathered as they come in.
460 /* go out and get the dump estimates */
462 g_fprintf(stderr,_("\nGETTING ESTIMATES...\n"));
463 section_start = curclock();
465 estq.head = estq.tail = NULL;
466 pestq.head = pestq.tail = NULL;
467 waitq.head = waitq.tail = NULL;
468 failq.head = failq.tail = NULL;
472 g_fprintf(stderr, _("%s: time %s: getting estimates took %s secs\n"),
474 walltime_str(curclock()),
475 walltime_str(timessub(curclock(), section_start)));
478 * At this point, all disks with estimates are in estq, and
479 * all the disks on hosts that didn't respond to our inquiry
483 dump_queue("FAILED", failq, 15, stderr);
484 dump_queue("DONE", estq, 15, stderr);
488 * 6. Analyze Dump Estimates
490 * Each disk's estimates are looked at to determine what level it
491 * should dump at, and to calculate the expected size and time taking
492 * historical dump rates and compression ratios into account. The
493 * total expected size is accumulated as well.
496 g_fprintf(stderr,_("\nANALYZING ESTIMATES...\n"));
497 section_start = curclock();
499 /* an empty tape still has a label and an endmark */
500 total_size = ((off_t)tt_blocksize_kb + (off_t)tape_mark) * (off_t)2;
504 schedq.head = schedq.tail = NULL;
505 while(!empty(estq)) analyze_estimate(dequeue_disk(&estq));
506 while(!empty(failq)) handle_failed(dequeue_disk(&failq));
509 * At this point, all the disks are on schedq sorted by priority.
510 * The total estimated size of the backups is in total_size.
516 g_fprintf(stderr, _("INITIAL SCHEDULE (size %lld):\n"),
517 (long long)total_size);
518 for(dp = schedq.head; dp != NULL; dp = dp->next) {
519 qname = quote_string(dp->name);
520 g_fprintf(stderr, _(" %s %s pri %d lev %d nsize %lld csize %lld\n"),
521 dp->host->hostname, qname, est(dp)->dump_priority,
523 (long long)est(dp)->dump_nsize,
524 (long long)est(dp)->dump_csize);
531 * 7. Delay Dumps if Schedule Too Big
533 * If the generated schedule is too big to fit on the tape, we need to
534 * delay some full dumps to make room. Incrementals will be done
535 * instead (except for new or forced disks).
537 * In extreme cases, delaying all the full dumps is not even enough.
538 * If so, some low-priority incrementals will be skipped completely
539 * until the dumps fit on the tape.
542 g_fprintf(stderr, _("\nDELAYING DUMPS IF NEEDED, total_size %lld, tape length %lld mark %zu\n"),
543 (long long)total_size,
544 (long long)tape_length,
547 initial_size = total_size;
551 /* XXX - why bother checking this? */
552 if(empty(schedq) && total_size < initial_size) {
553 error(_("cannot fit anything on tape, bailing out"));
559 * 8. Promote Dumps if Schedule Too Small
561 * Amanda attempts to balance the full dumps over the length of the
562 * dump cycle. If this night's full dumps are too small relative to
563 * the other nights, promote some high-priority full dumps that will be
564 * due for the next run, to full dumps for tonight, taking care not to
565 * overflow the tape size.
567 * This doesn't work too well for small sites. For these we scan ahead
568 * looking for nights that have an excessive number of dumps and promote
571 * Amanda never delays full dumps just for the sake of balancing the
572 * schedule, so it can take a full cycle to balance the schedule after
577 _("\nPROMOTING DUMPS IF NEEDED, total_lev0 %1.0lf, balanced_size %1.0lf...\n"),
578 total_lev0, balanced_size);
580 balance_threshold = balanced_size * PROMOTE_THRESHOLD;
582 while((balanced_size - total_lev0) > balance_threshold && moved_one)
583 moved_one = promote_highest_priority_incremental();
585 moved_one = promote_hills();
587 g_fprintf(stderr, _("%s: time %s: analysis took %s secs\n"),
589 walltime_str(curclock()),
590 walltime_str(timessub(curclock(), section_start)));
596 * The schedule goes to stdout, presumably to driver. A copy is written
597 * on stderr for the debug file.
600 g_fprintf(stderr,_("\nGENERATING SCHEDULE:\n--------\n"));
602 while(!empty(schedq)) output_scheduleline(dequeue_disk(&schedq));
603 g_fprintf(stderr, _("--------\n"));
606 log_add(L_FINISH, _("date %s time %s"), planner_timestamp, walltime_str(curclock()));
609 amfree(planner_timestamp);
610 amfree(our_feature_string);
611 am_release_feature_set(our_features);
622 * ========================================================================
623 * SETUP FOR ESTIMATES
627 static void askfor(est_t *, int, int, info_t *);
628 static int last_level(info_t *info); /* subroutines */
629 static off_t est_size(disk_t *dp, int level);
630 static off_t est_tape_size(disk_t *dp, int level);
631 static int next_level0(disk_t *dp, info_t *info);
632 static int runs_at(info_t *info, int lev);
633 static off_t bump_thresh(int level, off_t size_level_0, int bumppercent, off_t bumpsize, double bumpmult);
634 static int when_overwrite(char *label);
637 est_t *ep, /* esimate data block */
638 int seq, /* sequence number of request */
639 int lev, /* dump level being requested */
640 info_t *info) /* info block for disk */
642 if(seq < 0 || seq >= MAX_LEVELS) {
643 error(_("error [planner askfor: seq out of range 0..%d: %d]"),
647 if(lev < -1 || lev >= DUMP_LEVELS) {
648 error(_("error [planner askfor: lev out of range -1..%d: %d]"),
655 ep->dumpdate[seq] = (char *)0;
656 ep->est_size[seq] = (off_t)-2;
660 ep->level[seq] = lev;
662 ep->dumpdate[seq] = stralloc(get_dumpdate(info,lev));
664 ep->est_size[seq] = (off_t)-2;
679 assert(dp && dp->host);
681 qname = quote_string(dp->name);
682 g_fprintf(stderr, _("%s: time %s: setting up estimates for %s:%s\n"),
683 get_pname(), walltime_str(curclock()),
684 dp->host->hostname, qname);
686 /* get current information about disk */
688 if(get_info(dp->host->hostname, dp->name, &info)) {
689 /* no record for this disk, make a note of it */
690 log_add(L_INFO, _("Adding new disk %s:%s."), dp->host->hostname, dp->name);
693 /* setup working data struct for disk */
695 ep = alloc(SIZEOF(est_t));
696 dp->up = (void *) ep;
697 ep->state = DISK_READY;
698 ep->dump_nsize = (off_t)-1;
699 ep->dump_csize = (off_t)-1;
700 ep->dump_priority = dp->priority;
704 /* calculated fields */
706 if (ISSET(info.command, FORCE_FULL)) {
707 /* force a level 0, kind of like a new disk */
708 if(dp->strategy == DS_NOFULL) {
710 * XXX - Not sure what it means to force a no-full disk. The
711 * purpose of no-full is to just dump changes relative to a
712 * stable base, for example root partitions that vary only
713 * slightly from a site-wide prototype. Only the variations
716 * If we allow a level 0 onto the Amanda cycle, then we are
717 * hosed when that tape gets re-used next. Disallow this for
721 _("Cannot force full dump of %s:%s with no-full option."),
722 dp->host->hostname, qname);
724 /* clear force command */
725 CLR(info.command, FORCE_FULL);
726 if(put_info(dp->host->hostname, dp->name, &info)) {
727 error(_("could not put info record for %s:%s: %s"),
728 dp->host->hostname, qname, strerror(errno));
731 ep->last_level = last_level(&info);
732 ep->next_level0 = next_level0(dp, &info);
736 ep->next_level0 = -conf_dumpcycle;
737 log_add(L_INFO, _("Forcing full dump of %s:%s as directed."),
738 dp->host->hostname, qname);
741 else if(dp->strategy == DS_NOFULL) {
742 /* force estimate of level 1 */
744 ep->next_level0 = next_level0(dp, &info);
747 ep->last_level = last_level(&info);
748 ep->next_level0 = next_level0(dp, &info);
751 /* adjust priority levels */
753 /* warn if dump will be overwritten */
754 if (ep->last_level > -1 && strlen(info.inf[0].label) > 0) {
755 overwrite_runs = when_overwrite(info.inf[0].label);
756 if(overwrite_runs == 0) {
757 log_add(L_WARNING, _("Last full dump of %s:%s "
758 "on tape %s overwritten on this run."),
759 dp->host->hostname, qname, info.inf[0].label);
760 } else if(overwrite_runs <= RUNS_REDZONE) {
762 plural(_("Last full dump of %s:%s on tape %s overwritten in %d run."),
763 _("Last full dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
764 dp->host->hostname, qname, info.inf[0].label,
769 /* warn if last level 1 will be overwritten */
770 if (ep->last_level > 1 && strlen(info.inf[1].label) > 0) {
771 overwrite_runs = when_overwrite(info.inf[1].label);
772 if(overwrite_runs == 0) {
773 log_add(L_WARNING, _("Last level 1 dump of %s:%s "
774 "on tape %s overwritten on this run, resetting to level 1"),
775 dp->host->hostname, qname, info.inf[1].label);
777 } else if(overwrite_runs <= RUNS_REDZONE) {
779 plural(_("Last level 1 dump of %s:%s on tape %s overwritten in %d run."),
780 _("Last level 1 dump of %s:%s on tape %s overwritten in %d runs."), overwrite_runs),
781 dp->host->hostname, qname, info.inf[1].label,
786 if(ep->next_level0 < 0) {
787 g_fprintf(stderr,plural(_("%s:%s overdue %d day for level 0\n"),
788 _("%s:%s overdue %d days for level 0\n"),
790 dp->host->hostname, qname, (-ep->next_level0));
791 ep->dump_priority -= ep->next_level0;
793 else if (ISSET(info.command, FORCE_FULL))
794 ep->dump_priority += 1;
795 /* else XXX bump up the priority of incrementals that failed last night */
797 /* handle external level 0 dumps */
799 if(dp->skip_full && dp->strategy != DS_NOINC) {
800 if(ep->next_level0 <= 0) {
801 /* update the date field */
802 info.inf[0].date = today;
803 CLR(info.command, FORCE_FULL);
804 ep->next_level0 += conf_dumpcycle;
806 if(put_info(dp->host->hostname, dp->name, &info)) {
807 error(_("could not put info record for %s:%s: %s"),
808 dp->host->hostname, qname, strerror(errno));
811 log_add(L_INFO, _("Skipping full dump of %s:%s today."),
812 dp->host->hostname, qname);
813 g_fprintf(stderr,_("%s:%s lev 0 skipped due to skip-full flag\n"),
814 dp->host->hostname, qname);
815 /* don't enqueue the disk */
816 askfor(ep, 0, -1, &info);
817 askfor(ep, 1, -1, &info);
818 askfor(ep, 2, -1, &info);
819 g_fprintf(stderr, _("%s: SKIPPED %s %s 0 [skip-full]\n"),
820 get_pname(), dp->host->hostname, qname);
821 log_add(L_SUCCESS, _("%s %s %s 0 [skipped: skip-full]"),
822 dp->host->hostname, qname, planner_timestamp);
827 if(ep->last_level == -1) {
828 /* probably a new disk, but skip-full means no full! */
832 if(ep->next_level0 == 1) {
833 log_add(L_WARNING, _("Skipping full dump of %s:%s tomorrow."),
834 dp->host->hostname, qname);
838 if(dp->strategy == DS_INCRONLY && ep->last_level == -1 && !ISSET(info.command, FORCE_FULL)) {
839 /* don't enqueue the disk */
840 askfor(ep, 0, -1, &info);
841 askfor(ep, 1, -1, &info);
842 askfor(ep, 2, -1, &info);
843 log_add(L_FAIL, _("%s %s 19000101 1 [Skipping incronly because no full dump were done]"),
844 dp->host->hostname, qname);
845 g_fprintf(stderr,_("%s:%s lev 1 skipped due to strategy incronly and no full dump were done\n"),
846 dp->host->hostname, qname);
851 /* handle "skip-incr" type archives */
853 if(dp->skip_incr && ep->next_level0 > 0) {
854 g_fprintf(stderr,_("%s:%s lev 1 skipped due to skip-incr flag\n"),
855 dp->host->hostname, qname);
856 /* don't enqueue the disk */
857 askfor(ep, 0, -1, &info);
858 askfor(ep, 1, -1, &info);
859 askfor(ep, 2, -1, &info);
861 g_fprintf(stderr, _("%s: SKIPPED %s %s 1 [skip-incr]\n"),
862 get_pname(), dp->host->hostname, qname);
864 log_add(L_SUCCESS, _("%s %s %s 1 [skipped: skip-incr]"),
865 dp->host->hostname, qname, planner_timestamp);
870 if( ep->last_level == -1 && ep->next_level0 > 0 &&
871 dp->strategy != DS_NOFULL && dp->strategy != DS_INCRONLY &&
872 conf_reserve == 100) {
873 log_add(L_WARNING, _("%s:%s mismatch: no tapelist record, "
874 "but curinfo next_level0: %d."),
875 dp->host->hostname, qname, ep->next_level0);
879 if(ep->last_level == 0) ep->level_days = 0;
880 else ep->level_days = runs_at(&info, ep->last_level);
881 ep->last_lev0size = info.inf[0].csize;
883 ep->fullrate = perf_average(info.full.rate, 0.0);
884 ep->incrrate = perf_average(info.incr.rate, 0.0);
886 ep->fullcomp = perf_average(info.full.comp, dp->comprate[0]);
887 ep->incrcomp = perf_average(info.incr.comp, dp->comprate[1]);
889 /* determine which estimates to get */
893 if (dp->strategy == DS_NOINC ||
895 (!ISSET(info.command, FORCE_BUMP) ||
897 ep->last_level == -1))) {
898 if(info.command & FORCE_BUMP && ep->last_level == -1) {
900 _("Remove force-bump command of %s:%s because it's a new disk."),
901 dp->host->hostname, qname);
903 switch (dp->strategy) {
906 askfor(ep, i++, 0, &info);
908 log_add(L_INFO, _("Ignoring skip_full for %s:%s "
909 "because the strategy is NOINC."),
910 dp->host->hostname, qname);
912 if(info.command & FORCE_BUMP) {
914 _("Ignoring FORCE_BUMP for %s:%s because the strategy is NOINC."),
915 dp->host->hostname, qname);
924 if (ISSET(info.command, FORCE_FULL))
930 if(!dp->skip_incr && !(dp->strategy == DS_NOINC)) {
931 if(ep->last_level == -1) { /* a new disk */
932 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY) {
933 askfor(ep, i++, 1, &info);
935 assert(!dp->skip_full); /* should be handled above */
937 } else { /* not new, pick normally */
940 curr_level = ep->last_level;
942 if (ISSET(info.command, FORCE_NO_BUMP)) {
943 if(curr_level > 0) { /* level 0 already asked for */
944 askfor(ep, i++, curr_level, &info);
946 log_add(L_INFO,_("Preventing bump of %s:%s as directed."),
947 dp->host->hostname, qname);
948 } else if (ISSET(info.command, FORCE_BUMP)
949 && curr_level + 1 < DUMP_LEVELS) {
950 askfor(ep, i++, curr_level+1, &info);
951 log_add(L_INFO,_("Bumping of %s:%s at level %d as directed."),
952 dp->host->hostname, qname, curr_level+1);
953 } else if (curr_level == 0) {
954 askfor(ep, i++, 1, &info);
956 askfor(ep, i++, curr_level, &info);
958 * If last time we dumped less than the threshold, then this
959 * time we will too, OR the extra size will be charged to both
960 * cur_level and cur_level + 1, so we will never bump. Also,
961 * if we haven't been at this level 2 days, or the dump failed
962 * last night, we can't bump.
964 if((info.inf[curr_level].size == (off_t)0 || /* no data, try it anyway */
965 (((info.inf[curr_level].size > bump_thresh(curr_level, info.inf[0].size,dp->bumppercent, dp->bumpsize, dp->bumpmult)))
966 && ep->level_days >= dp->bumpdays))
967 && curr_level + 1 < DUMP_LEVELS) {
968 askfor(ep, i++, curr_level+1, &info);
974 while(i < MAX_LEVELS) /* mark end of estimates */
975 askfor(ep, i++, -1, &info);
979 g_fprintf(stderr, _("setup_estimate: %s:%s: command %u, options: %s "
980 "last_level %d next_level0 %d level_days %d getting estimates "
981 "%d (%lld) %d (%lld) %d (%lld)\n"),
982 dp->host->hostname, qname, info.command,
983 dp->strategy == DS_NOFULL ? "no-full" :
984 dp->strategy == DS_INCRONLY ? "incr-only" :
985 dp->skip_full ? "skip-full" :
986 dp->skip_incr ? "skip-incr" : "none",
987 ep->last_level, ep->next_level0, ep->level_days,
988 ep->level[0], (long long)ep->est_size[0],
989 ep->level[1], (long long)ep->est_size[1],
990 ep->level[2], (long long)ep->est_size[2]);
992 assert(ep->level[0] != -1);
993 enqueue_disk(&startq, dp);
997 static int when_overwrite(
1003 runtapes = conf_runtapes;
1004 if(runtapes == 0) runtapes = 1;
1006 if((tp = lookup_tapelabel(label)) == NULL)
1007 return 1; /* "shouldn't happen", but trigger warning message */
1008 else if(tp->reuse == 0)
1010 else if(lookup_nb_tape() > conf_tapecycle)
1011 return (lookup_nb_tape() - tp->position) / runtapes;
1013 return (conf_tapecycle - tp->position) / runtapes;
1016 /* Return the estimated size for a particular dump */
1017 static off_t est_size(
1023 for(i = 0; i < MAX_LEVELS; i++) {
1024 if(level == est(dp)->level[i])
1025 return est(dp)->est_size[i];
1030 /* Return the estimated on-tape size of a particular dump */
1031 static off_t est_tape_size(
1038 size = est_size(dp, level);
1040 if(size == (off_t)-1) return size;
1042 if(dp->compress == COMP_NONE)
1045 if(level == 0) ratio = est(dp)->fullcomp;
1046 else ratio = est(dp)->incrcomp;
1049 * make sure over-inflated compression ratios don't throw off the
1050 * estimates, this is mostly for when you have a small dump getting
1051 * compressed which takes up alot more disk/tape space relatively due
1052 * to the overhead of the compression. This is specifically for
1053 * Digital Unix vdump. This patch is courtesy of Rudolf Gabler
1054 * (RUG@USM.Uni-Muenchen.DE)
1057 if(ratio > 1.1) ratio = 1.1;
1059 size = (off_t)((double)size * ratio);
1062 * Ratio can be very small in some error situations, so make sure
1063 * size goes back greater than zero. It may not be right, but
1064 * indicates we did get an estimate.
1066 if(size <= (off_t)0) {
1074 /* what was the level of the last successful dump to tape? */
1075 static int last_level(
1078 int min_pos, min_level, i;
1079 time_t lev0_date, last_date;
1082 if(info->last_level != -1)
1083 return info->last_level;
1085 /* to keep compatibility with old infofile */
1086 min_pos = 1000000000;
1090 for(i = 0; i < 9; i++) {
1091 if(conf_reserve < 100) {
1092 if(i == 0) lev0_date = info->inf[0].date;
1093 else if(info->inf[i].date < lev0_date) continue;
1094 if(info->inf[i].date > last_date) {
1095 last_date = info->inf[i].date;
1100 if((tp = lookup_tapelabel(info->inf[i].label)) == NULL) continue;
1101 /* cull any entries from previous cycles */
1102 if(i == 0) lev0_date = info->inf[0].date;
1103 else if(info->inf[i].date < lev0_date) continue;
1105 if(tp->position < min_pos) {
1106 min_pos = tp->position;
1111 info->last_level = i;
1115 /* when is next level 0 due? 0 = today, 1 = tomorrow, etc*/
1121 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY)
1122 return 1; /* fake it */
1123 else if (dp->strategy == DS_NOINC)
1125 else if(info->inf[0].date < (time_t)0)
1126 return -days_diff(EPOCH, today); /* new disk */
1128 return dp->dumpcycle - days_diff(info->inf[0].date, today);
1131 /* how many runs at current level? */
1136 tape_t *cur_tape, *old_tape;
1139 last = last_level(info);
1140 if(lev != last) return 0;
1141 if(lev == 0) return 1;
1143 if(info->consecutive_runs != -1)
1144 return info->consecutive_runs;
1146 /* to keep compatibility with old infofile */
1147 cur_tape = lookup_tapelabel(info->inf[lev].label);
1148 old_tape = lookup_tapelabel(info->inf[lev-1].label);
1149 if(cur_tape == NULL || old_tape == NULL) return 0;
1151 if(conf_runtapes == 0)
1152 nb_runs = (old_tape->position - cur_tape->position) / 1;
1154 nb_runs = (old_tape->position - cur_tape->position) / conf_runtapes;
1155 info->consecutive_runs = nb_runs;
1161 static off_t bump_thresh(
1170 if ((bumppercent != 0) && (size_level_0 > (off_t)1024)) {
1171 bump = ((double)size_level_0 * (double)bumppercent) / 100.0;
1174 bump = (double)bumpsize;
1176 while(--level) bump = bump * bumpmult;
1184 * ========================================================================
1185 * GET REMOTE DUMP SIZE ESTIMATES
1189 static void getsize(am_host_t *hostp);
1190 static disk_t *lookup_hostdisk(am_host_t *hp, char *str);
1191 static void handle_result(void *datap, pkt_t *pkt, security_handle_t *sech);
1194 static void get_estimates(void)
1198 int something_started;
1200 something_started = 1;
1201 while(something_started) {
1202 something_started = 0;
1203 for(dp = startq.head; dp != NULL; dp = dp->next) {
1205 if(hostp->up == HOST_READY) {
1206 something_started = 1;
1210 * dp is no longer on startq, so dp->next is not valid
1211 * and we have to start all over.
1219 while(!empty(waitq)) {
1220 disk_t *dp = dequeue_disk(&waitq);
1221 est(dp)->errstr = _("hmm, disk was stranded on waitq");
1222 enqueue_disk(&failq, dp);
1225 while(!empty(pestq)) {
1226 disk_t *dp = dequeue_disk(&pestq);
1227 char * qname = quote_string(dp->name);
1229 if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < (off_t)0) {
1230 if(est(dp)->est_size[0] == (off_t)-1) {
1231 log_add(L_WARNING, _("disk %s:%s, estimate of level %d failed."),
1232 dp->host->hostname, qname, est(dp)->level[0]);
1236 _("disk %s:%s, estimate of level %d timed out."),
1237 dp->host->hostname, qname, est(dp)->level[0]);
1239 est(dp)->level[0] = -1;
1242 if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < (off_t)0) {
1243 if(est(dp)->est_size[1] == (off_t)-1) {
1245 _("disk %s:%s, estimate of level %d failed."),
1246 dp->host->hostname, qname, est(dp)->level[1]);
1250 _("disk %s:%s, estimate of level %d timed out."),
1251 dp->host->hostname, qname, est(dp)->level[1]);
1253 est(dp)->level[1] = -1;
1256 if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < (off_t)0) {
1257 if(est(dp)->est_size[2] == (off_t)-1) {
1259 _("disk %s:%s, estimate of level %d failed."),
1260 dp->host->hostname, qname, est(dp)->level[2]);
1264 _("disk %s:%s, estimate of level %d timed out."),
1265 dp->host->hostname, qname, est(dp)->level[2]);
1267 est(dp)->level[2] = -1;
1270 if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > (off_t)0) ||
1271 (est(dp)->level[1] != -1 && est(dp)->est_size[1] > (off_t)0) ||
1272 (est(dp)->level[2] != -1 && est(dp)->est_size[2] > (off_t)0)) {
1273 enqueue_disk(&estq, dp);
1276 est(dp)->errstr = vstralloc("disk ", qname,
1277 _(", all estimate timed out"), NULL);
1278 enqueue_disk(&failq, dp);
1284 static void getsize(
1287 char number[NUM_STR_SIZE], *req;
1290 time_t estimates, timeout;
1292 const security_driver_t *secdrv;
1298 assert(hostp->disks != NULL);
1300 if(hostp->up != HOST_READY) {
1305 * The first time through here we send a "noop" request. This will
1306 * return the feature list from the client if it supports that.
1307 * If it does not, handle_result() will set the feature list to an
1308 * empty structure. In either case, we do the disks on the second
1309 * (and subsequent) pass(es).
1311 if(hostp->features != NULL) { /* sendsize service */
1315 int has_features = am_has_feature(hostp->features,
1316 fe_req_options_features);
1317 int has_hostname = am_has_feature(hostp->features,
1318 fe_req_options_hostname);
1319 int has_maxdumps = am_has_feature(hostp->features,
1320 fe_req_options_maxdumps);
1321 int has_config = am_has_feature(hostp->features,
1322 fe_req_options_config);
1324 g_snprintf(number, SIZEOF(number), "%d", hostp->maxdumps);
1325 req = vstralloc("SERVICE ", "sendsize", "\n",
1327 has_features ? "features=" : "",
1328 has_features ? our_feature_string : "",
1329 has_features ? ";" : "",
1330 has_maxdumps ? "maxdumps=" : "",
1331 has_maxdumps ? number : "",
1332 has_maxdumps ? ";" : "",
1333 has_hostname ? "hostname=" : "",
1334 has_hostname ? hostp->hostname : "",
1335 has_hostname ? ";" : "",
1336 has_config ? "config=" : "",
1337 has_config ? config_name : "",
1338 has_config ? ";" : "",
1341 req_len = strlen(req);
1342 req_len += 128; /* room for SECURITY ... */
1344 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1348 if(dp->todo == 0) continue;
1350 if(est(dp)->state != DISK_READY) continue;
1352 est(dp)->got_estimate = 0;
1353 if(est(dp)->level[0] == -1) {
1354 est(dp)->state = DISK_DONE;
1358 qname = quote_string(dp->name);
1359 qdevice = quote_string(dp->device);
1360 if(dp->estimate == ES_CLIENT ||
1361 dp->estimate == ES_CALCSIZE) {
1364 for(i = 0; i < MAX_LEVELS; i++) {
1366 char *exclude1 = "";
1367 char *exclude2 = "";
1368 char *excludefree = NULL;
1369 char *include1 = "";
1370 char *include2 = "";
1371 char *includefree = NULL;
1372 char spindle[NUM_STR_SIZE];
1373 char level[NUM_STR_SIZE];
1374 int lev = est(dp)->level[i];
1376 if(lev == -1) break;
1378 g_snprintf(level, SIZEOF(level), "%d", lev);
1379 g_snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1380 if(am_has_feature(hostp->features,fe_sendsize_req_options)){
1381 exclude1 = " OPTIONS |";
1382 exclude2 = optionstr(dp, hostp->features, NULL);
1383 if ( exclude2 == NULL ) {
1384 error(_("problem with option string, check the dumptype definition.\n"));
1386 excludefree = exclude2;
1390 if(dp->exclude_file &&
1391 dp->exclude_file->nb_element == 1) {
1392 exclude1 = " exclude-file=";
1394 quote_string(dp->exclude_file->first->name);
1395 excludefree = exclude2;
1397 else if(dp->exclude_list &&
1398 dp->exclude_list->nb_element == 1) {
1399 exclude1 = " exclude-list=";
1401 quote_string(dp->exclude_list->first->name);
1402 excludefree = exclude2;
1404 if(dp->include_file &&
1405 dp->include_file->nb_element == 1) {
1406 include1 = " include-file=";
1408 quote_string(dp->include_file->first->name);
1409 includefree = include2;
1411 else if(dp->include_list &&
1412 dp->include_list->nb_element == 1) {
1413 include1 = " include-list=";
1415 quote_string(dp->include_list->first->name);
1416 includefree = include2;
1420 if(dp->estimate == ES_CALCSIZE &&
1421 !am_has_feature(hostp->features, fe_calcsize_estimate)) {
1422 log_add(L_WARNING,_("%s:%s does not support CALCSIZE for estimate, using CLIENT.\n"),
1423 hostp->hostname, qname);
1424 dp->estimate = ES_CLIENT;
1426 if(dp->estimate == ES_CLIENT)
1429 calcsize = "CALCSIZE ";
1431 if(strcmp(dp->program,"DUMP") == 0 ||
1432 strcmp(dp->program,"GNUTAR") == 0) {
1435 backup_api = "BACKUP ";
1437 l = vstralloc(calcsize,
1441 " ", dp->device ? qdevice : "",
1443 " ", est(dp)->dumpdate[i],
1445 " ", exclude1, exclude2,
1446 ((includefree != NULL) ? " " : ""),
1453 amfree(includefree);
1454 amfree(excludefree);
1462 est(dp)->state = DISK_ACTIVE;
1463 remove_disk(&startq, dp);
1465 else if (dp->estimate == ES_SERVER) {
1468 get_info(dp->host->hostname, dp->name, &info);
1469 for(i = 0; i < MAX_LEVELS; i++) {
1471 int lev = est(dp)->level[i];
1473 if(lev == -1) break;
1474 if(lev == 0) { /* use latest level 0, should do extrapolation */
1475 off_t est_size = (off_t)0;
1478 for(j=NB_HISTORY-2;j>=0;j--) {
1479 if(info.history[j].level == 0) {
1480 if(info.history[j].size < (off_t)0) continue;
1481 est_size = info.history[j].size;
1486 est(dp)->est_size[i] = est_size;
1488 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1489 est(dp)->est_size[i] = info.inf[lev].size;
1492 est(dp)->est_size[i] = (off_t)1000000;
1495 else if(lev == est(dp)->last_level) {
1496 /* means of all X day at the same level */
1499 off_t est_size_day[NB_DAY];
1500 int nb_est_day[NB_DAY];
1501 for(j=0;j<NB_DAY;j++) {
1502 est_size_day[j]=(off_t)0;
1506 for(j=NB_HISTORY-2;j>=0;j--) {
1507 if(info.history[j].level <= 0) continue;
1508 if(info.history[j].size < (off_t)0) continue;
1509 if(info.history[j].level==info.history[j+1].level) {
1510 if(nb_day <NB_DAY-1) nb_day++;
1511 est_size_day[nb_day] += info.history[j].size;
1512 nb_est_day[nb_day]++;
1518 nb_day = info.consecutive_runs + 1;
1519 if(nb_day > NB_DAY-1) nb_day = NB_DAY-1;
1521 while(nb_day > 0 && nb_est_day[nb_day] == 0) nb_day--;
1523 if(nb_est_day[nb_day] > 0) {
1524 est(dp)->est_size[i] = est_size_day[nb_day] /
1525 (off_t)nb_est_day[nb_day];
1527 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1528 est(dp)->est_size[i] = info.inf[lev].size;
1531 est(dp)->est_size[i] = (off_t)10000;
1534 else if(lev == est(dp)->last_level + 1) {
1535 /* means of all first day at a new level */
1536 off_t est_size = (off_t)0;
1539 for(j=NB_HISTORY-2;j>=0;j--) {
1540 if(info.history[j].level <= 0) continue;
1541 if(info.history[j].size < (off_t)0) continue;
1542 if(info.history[j].level == info.history[j+1].level + 1 ) {
1543 est_size += info.history[j].size;
1548 est(dp)->est_size[i] = est_size / (off_t)nb_est;
1550 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1551 est(dp)->est_size[i] = info.inf[lev].size;
1554 est(dp)->est_size[i] = (off_t)100000;
1558 g_fprintf(stderr,_("%s time %s: got result for host %s disk %s:"),
1559 get_pname(), walltime_str(curclock()),
1560 dp->host->hostname, qname);
1561 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1562 est(dp)->level[0], (long long)est(dp)->est_size[0],
1563 est(dp)->level[1], (long long)est(dp)->est_size[1],
1564 est(dp)->level[2], (long long)est(dp)->est_size[2]);
1565 est(dp)->state = DISK_DONE;
1566 remove_disk(&startq, dp);
1567 enqueue_disk(&estq, dp);
1573 if(estimates == 0) {
1575 hostp->up = HOST_DONE;
1579 if (conf_etimeout < 0) {
1580 timeout = - conf_etimeout;
1582 timeout = estimates * conf_etimeout;
1584 } else { /* noop service */
1585 req = vstralloc("SERVICE ", "noop", "\n",
1587 "features=", our_feature_string, ";",
1591 * We use ctimeout for the "noop" request because it should be
1592 * very fast and etimeout has other side effects.
1594 timeout = (time_t)getconf_int(CNF_CTIMEOUT);
1597 secdrv = security_getdriver(hostp->disks->security_driver);
1598 if (secdrv == NULL) {
1599 hostp->up = HOST_DONE;
1601 _("Could not find security driver '%s' for host '%s'"),
1602 hostp->disks->security_driver, hostp->hostname);
1606 hostp->up = HOST_ACTIVE;
1608 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1612 if(est(dp)->state == DISK_ACTIVE) {
1613 est(dp)->errstr = NULL;
1614 enqueue_disk(&waitq, dp);
1618 protocol_sendreq(hostp->hostname, secdrv, amhost_get_security_conf,
1619 req, timeout, handle_result, hostp);
1624 static disk_t *lookup_hostdisk(
1625 /*@keep@*/ am_host_t *hp,
1630 for(dp = hp->disks; dp != NULL; dp = dp->hostnext)
1631 if(strcmp(str, dp->name) == 0) return dp;
1637 static void handle_result(
1640 security_handle_t *sech)
1646 char *msg, msg_undo;
1647 char *remoterr, *errbuf = NULL;
1658 hostp = (am_host_t *)datap;
1659 hostp->up = HOST_READY;
1662 errbuf = vstrallocf(_("Request to %s failed: %s"),
1663 hostp->hostname, security_geterror(sech));
1666 if (pkt->type == P_NAK) {
1668 if(strncmp_const_skip(s, "ERROR ", s, ch) == 0) {
1671 goto NAK_parse_failed;
1673 skip_whitespace(s, ch);
1674 if(ch == '\0') goto NAK_parse_failed;
1676 if((s = strchr(remoterr, '\n')) != NULL) {
1677 if(s == remoterr) goto NAK_parse_failed;
1680 if (strcmp(remoterr, "unknown service: noop") != 0
1681 && strcmp(remoterr, "noop: invalid service") != 0) {
1682 errbuf = vstralloc(hostp->hostname, " NAK: ", remoterr, NULL);
1693 if(strncmp_const(line, "OPTIONS ") == 0) {
1694 t = strstr(line, "features=");
1695 if(t != NULL && (isspace((int)t[-1]) || t[-1] == ';')) {
1696 t += SIZEOF("features=")-1;
1697 am_release_feature_set(hostp->features);
1698 if((hostp->features = am_string_to_feature(t)) == NULL) {
1699 errbuf = vstrallocf(hostp->hostname,
1700 _(": bad features value: %s\n"), line);
1704 skip_quoted_line(s, ch);
1709 if(strncmp_const_skip(t, "ERROR ", t, tch) == 0) {
1711 skip_whitespace(t, tch);
1717 * If the "error" is that the "noop" service is unknown, it
1718 * just means the client is "old" (does not support the servie).
1719 * We can ignore this.
1721 if(hostp->features == NULL
1722 && pkt->type == P_NAK
1723 && (strcmp(t - 1, "unknown service: noop") == 0
1724 || strcmp(t - 1, "noop: invalid service") == 0)) {
1725 skip_quoted_line(s, ch);
1728 errbuf = vstralloc(hostp->hostname,
1729 (pkt->type == P_NAK) ? "NAK " : "",
1738 skip_quoted_string(t, tch);
1740 disk = unquote_string(msg);
1742 skip_whitespace(t, tch);
1744 if (sscanf(t - 1, "%d", &level) != 1) {
1748 skip_integer(t, tch);
1749 skip_whitespace(t, tch);
1751 dp = lookup_hostdisk(hostp, disk);
1752 dp = lookup_hostdisk(hostp, disk);
1754 log_add(L_ERROR, _("%s: invalid reply from sendsize: `%s'\n"),
1755 hostp->hostname, line);
1760 if (strncmp_const(t-1,"SIZE ") == 0) {
1761 if (sscanf(t - 1, "SIZE %lld", &size_) != 1) {
1764 size = (off_t)size_;
1765 } else if (strncmp_const(t-1,"ERROR ") == 0) {
1766 skip_non_whitespace(t, tch);
1767 skip_whitespace(t, tch);
1769 skip_quoted_string(t,tch);
1772 if (pkt->type == P_REP) {
1773 est(dp)->errstr = unquote_string(msg);
1782 if (size > (off_t)-1) {
1783 for(i = 0; i < MAX_LEVELS; i++) {
1784 if(est(dp)->level[i] == level) {
1785 est(dp)->est_size[i] = size;
1789 if(i == MAX_LEVELS) {
1790 goto bad_msg; /* this est wasn't requested */
1792 est(dp)->got_estimate++;
1796 skip_quoted_line(s, ch);
1799 if(hostp->up == HOST_READY && hostp->features == NULL) {
1801 * The client does not support the features list, so give it an
1804 dbprintf(_("no feature set from host %s\n"), hostp->hostname);
1805 hostp->features = am_set_default_feature_set();
1808 security_close_connection(sech, hostp->hostname);
1810 /* XXX what about disks that only got some estimates... do we care? */
1811 /* XXX amanda 2.1 treated that case as a bad msg */
1813 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1814 if(dp->todo == 0) continue;
1815 if(est(dp)->state != DISK_ACTIVE &&
1816 est(dp)->state != DISK_PARTIALY_DONE) continue;
1818 if(est(dp)->state == DISK_ACTIVE) {
1819 remove_disk(&waitq, dp);
1821 else if(est(dp)->state == DISK_PARTIALY_DONE) {
1822 remove_disk(&pestq, dp);
1825 if(pkt->type == P_REP) {
1826 est(dp)->state = DISK_DONE;
1828 else if(pkt->type == P_PREP) {
1829 est(dp)->state = DISK_PARTIALY_DONE;
1832 if(est(dp)->level[0] == -1) continue; /* ignore this disk */
1835 qname = quote_string(dp->name);
1836 if(pkt->type == P_PREP) {
1837 g_fprintf(stderr,_("%s: time %s: got partial result for host %s disk %s:"),
1838 get_pname(), walltime_str(curclock()),
1839 dp->host->hostname, qname);
1840 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1841 est(dp)->level[0], (long long)est(dp)->est_size[0],
1842 est(dp)->level[1], (long long)est(dp)->est_size[1],
1843 est(dp)->level[2], (long long)est(dp)->est_size[2]);
1844 enqueue_disk(&pestq, dp);
1846 else if(pkt->type == P_REP) {
1847 g_fprintf(stderr,_("%s: time %s: got result for host %s disk %s:"),
1848 get_pname(), walltime_str(curclock()),
1849 dp->host->hostname, qname);
1850 g_fprintf(stderr,_(" %d -> %lldK, %d -> %lldK, %d -> %lldK\n"),
1851 est(dp)->level[0], (long long)est(dp)->est_size[0],
1852 est(dp)->level[1], (long long)est(dp)->est_size[1],
1853 est(dp)->level[2], (long long)est(dp)->est_size[2]);
1854 if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > (off_t)0) ||
1855 (est(dp)->level[1] != -1 && est(dp)->est_size[1] > (off_t)0) ||
1856 (est(dp)->level[2] != -1 && est(dp)->est_size[2] > (off_t)0)) {
1858 if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < (off_t)0) {
1860 _("disk %s:%s, estimate of level %d failed."),
1861 dp->host->hostname, qname, est(dp)->level[2]);
1862 est(dp)->level[2] = -1;
1864 if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < (off_t)0) {
1866 _("disk %s:%s, estimate of level %d failed."),
1867 dp->host->hostname, qname,
1869 est(dp)->level[1] = -1;
1871 if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < (off_t)0) {
1873 _("disk %s:%s, estimate of level %d failed."),
1874 dp->host->hostname, qname, est(dp)->level[0]);
1875 est(dp)->level[0] = -1;
1877 enqueue_disk(&estq, dp);
1880 enqueue_disk(&failq, dp);
1881 if(est(dp)->got_estimate) {
1882 est(dp)->errstr = vstrallocf("disk %s, all estimate failed",
1887 _("error result for host %s disk %s: missing estimate\n"),
1888 dp->host->hostname, qname);
1889 if (est(dp)->errstr == NULL) {
1890 est(dp)->errstr = vstrallocf(_("missing result for %s in %s response"),
1891 qname, dp->host->hostname);
1899 /* try to clean up any defunct processes, since Amanda doesn't wait() for
1901 while(waitpid(-1, NULL, WNOHANG)> 0);
1906 errbuf = vstrallocf(_("%s NAK: [NAK parse failed]"), hostp->hostname);
1907 g_fprintf(stderr, _("got strange nak from %s:\n----\n%s----\n\n"),
1908 hostp->hostname, pkt->body);
1912 g_fprintf(stderr,_("got a bad message, stopped at:\n"));
1914 g_fprintf(stderr,_("----\n%s----\n\n"), line);
1915 errbuf = stralloc2(_("badly formatted response from "), hostp->hostname);
1920 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1922 if(est(dp)->state == DISK_ACTIVE) {
1923 qname = quote_string(dp->name);
1924 est(dp)->state = DISK_DONE;
1925 remove_disk(&waitq, dp);
1926 enqueue_disk(&failq, dp);
1929 est(dp)->errstr = stralloc(errbuf);
1930 g_fprintf(stderr, _("error result for host %s disk %s: %s\n"),
1931 dp->host->hostname, qname, errbuf);
1938 * If there were no disks involved, make sure the error gets
1941 log_add(L_ERROR, "%s", errbuf);
1943 hostp->up = HOST_DONE;
1945 /* try to clean up any defunct processes, since Amanda doesn't wait() for
1947 while(waitpid(-1, NULL, WNOHANG)> 0);
1954 * ========================================================================
1959 static int schedule_order(disk_t *a, disk_t *b); /* subroutines */
1960 static int pick_inclevel(disk_t *dp);
1962 static void analyze_estimate(
1968 char *qname = quote_string(dp->name);
1972 g_fprintf(stderr, _("pondering %s:%s... "),
1973 dp->host->hostname, qname);
1974 g_fprintf(stderr, _("next_level0 %d last_level %d "),
1975 ep->next_level0, ep->last_level);
1977 if(get_info(dp->host->hostname, dp->name, &info) == 0) {
1981 ep->degr_level = -1;
1982 ep->degr_nsize = (off_t)-1;
1983 ep->degr_csize = (off_t)-1;
1985 if(ep->next_level0 <= 0 || (have_info && ep->last_level == 0
1986 && (info.command & FORCE_NO_BUMP))) {
1987 if(ep->next_level0 <= 0) {
1988 g_fprintf(stderr,_("(due for level 0) "));
1991 ep->dump_nsize = est_size(dp, 0);
1992 ep->dump_csize = est_tape_size(dp, 0);
1993 if(ep->dump_csize <= (off_t)0) {
1995 _("(no estimate for level 0, picking an incr level)\n"));
1996 ep->dump_level = pick_inclevel(dp);
1997 ep->dump_nsize = est_size(dp, ep->dump_level);
1998 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2000 if(ep->dump_nsize == (off_t)-1) {
2001 ep->dump_level = ep->dump_level + 1;
2002 ep->dump_nsize = est_size(dp, ep->dump_level);
2003 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2007 total_lev0 += (double) ep->dump_csize;
2008 if(ep->last_level == -1 || dp->skip_incr) {
2009 g_fprintf(stderr,_("(%s disk, can't switch to degraded mode)\n"),
2010 dp->skip_incr? "skip-incr":_("new"));
2011 ep->degr_level = -1;
2012 ep->degr_nsize = (off_t)-1;
2013 ep->degr_csize = (off_t)-1;
2016 /* fill in degraded mode info */
2017 g_fprintf(stderr,_("(picking inclevel for degraded mode)"));
2018 ep->degr_level = pick_inclevel(dp);
2019 ep->degr_nsize = est_size(dp, ep->degr_level);
2020 ep->degr_csize = est_tape_size(dp, ep->degr_level);
2021 if(ep->degr_csize == (off_t)-1) {
2022 ep->degr_level = ep->degr_level + 1;
2023 ep->degr_nsize = est_size(dp, ep->degr_level);
2024 ep->degr_csize = est_tape_size(dp, ep->degr_level);
2026 if(ep->degr_csize == (off_t)-1) {
2027 g_fprintf(stderr,_("(no inc estimate)"));
2028 ep->degr_level = -1;
2030 g_fprintf(stderr,"\n");
2035 g_fprintf(stderr,_("(not due for a full dump, picking an incr level)\n"));
2036 /* XXX - if this returns -1 may be we should force a total? */
2037 ep->dump_level = pick_inclevel(dp);
2038 ep->dump_nsize = est_size(dp, ep->dump_level);
2039 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2041 if(ep->dump_csize == (off_t)-1) {
2042 ep->dump_level = ep->last_level;
2043 ep->dump_nsize = est_size(dp, ep->dump_level);
2044 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2046 if(ep->dump_csize == (off_t)-1) {
2047 ep->dump_level = ep->last_level + 1;
2048 ep->dump_nsize = est_size(dp, ep->dump_level);
2049 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2051 if(ep->dump_csize == (off_t)-1) {
2053 ep->dump_nsize = est_size(dp, ep->dump_level);
2054 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2058 g_fprintf(stderr,_(" curr level %d nsize %lld csize %lld "),
2059 ep->dump_level, (long long)ep->dump_nsize,
2060 (long long)ep->dump_csize);
2062 insert_disk(&schedq, dp, schedule_order);
2064 total_size += (off_t)tt_blocksize_kb + ep->dump_csize + tape_mark;
2066 /* update the balanced size */
2067 if(!(dp->skip_full || dp->strategy == DS_NOFULL ||
2068 dp->strategy == DS_INCRONLY)) {
2071 lev0size = est_tape_size(dp, 0);
2072 if(lev0size == (off_t)-1) lev0size = ep->last_lev0size;
2074 balanced_size += (double)(lev0size / (off_t)runs_per_cycle);
2077 g_fprintf(stderr,_("total size %lld total_lev0 %1.0lf balanced-lev0size %1.0lf\n"),
2078 (long long)total_size, total_lev0, balanced_size);
2082 static void handle_failed(
2085 char *errstr, *errstr1, *qerrstr;
2086 char *qname = quote_string(dp->name);
2088 errstr = est(dp)->errstr? est(dp)->errstr : _("hmm, no error indicator!");
2089 errstr1 = vstralloc("[",errstr,"]", NULL);
2090 qerrstr = quote_string(errstr1);
2093 g_fprintf(stderr, _("%s: FAILED %s %s %s 0 %s\n"),
2094 get_pname(), dp->host->hostname, qname, planner_timestamp, qerrstr);
2096 log_add(L_FAIL, _("%s %s %s 0 %s"), dp->host->hostname, qname,
2097 planner_timestamp, qerrstr);
2101 /* XXX - memory leak with *dp */
2106 * insert-sort by decreasing priority, then
2107 * by decreasing size within priority levels.
2110 static int schedule_order(
2117 diff = est(b)->dump_priority - est(a)->dump_priority;
2118 if(diff != 0) return diff;
2120 ldiff = est(b)->dump_csize - est(a)->dump_csize;
2121 if(ldiff < (off_t)0) return -1; /* XXX - there has to be a better way to dothis */
2122 if(ldiff > (off_t)0) return 1;
2127 static int pick_inclevel(
2130 int base_level, bump_level;
2131 off_t base_size, bump_size;
2135 base_level = est(dp)->last_level;
2137 /* if last night was level 0, do level 1 tonight, no ifs or buts */
2138 if(base_level == 0) {
2139 g_fprintf(stderr,_(" picklev: last night 0, so tonight level 1\n"));
2143 /* if no-full option set, always do level 1 */
2144 if(dp->strategy == DS_NOFULL) {
2145 g_fprintf(stderr,_(" picklev: no-full set, so always level 1\n"));
2149 base_size = est_size(dp, base_level);
2151 /* if we didn't get an estimate, we can't do an inc */
2152 if(base_size == (off_t)-1) {
2153 base_size = est_size(dp, base_level+1);
2154 if(base_size > (off_t)0) /* FORCE_BUMP */
2155 return base_level+1;
2156 g_fprintf(stderr,_(" picklev: no estimate for level %d, so no incs\n"), base_level);
2160 thresh = bump_thresh(base_level, est_size(dp, 0), dp->bumppercent, dp->bumpsize, dp->bumpmult);
2163 _(" pick: size %lld level %d days %d (thresh %lldK, %d days)\n"),
2164 (long long)base_size, base_level, est(dp)->level_days,
2165 (long long)thresh, dp->bumpdays);
2168 || est(dp)->level_days < dp->bumpdays
2169 || base_size <= thresh)
2172 bump_level = base_level + 1;
2173 bump_size = est_size(dp, bump_level);
2175 if(bump_size == (off_t)-1) return base_level;
2177 g_fprintf(stderr, _(" pick: next size %lld... "),
2178 (long long)bump_size);
2180 if(base_size - bump_size < thresh) {
2181 g_fprintf(stderr, _("not bumped\n"));
2185 qname = quote_string(dp->name);
2186 g_fprintf(stderr, _("BUMPED\n"));
2187 log_add(L_INFO, _("Incremental of %s:%s bumped to level %d."),
2188 dp->host->hostname, qname, bump_level);
2198 ** ========================================================================
2201 ** We have two strategies here:
2205 ** If we are trying to fit too much on the tape something has to go. We
2206 ** try to delay totals until tomorrow by converting them into incrementals
2207 ** and, if that is not effective enough, dropping incrementals altogether.
2208 ** While we are searching for the guilty dump (the one that is really
2209 ** causing the schedule to be oversize) we have probably trampled on a lot of
2210 ** innocent dumps, so we maintain a "before image" list and use this to
2211 ** put back what we can.
2213 ** 2. Promote dumps.
2215 ** We try to keep the amount of tape used by total dumps the same each night.
2216 ** If there is some spare tape in this run we have a look to see if any of
2217 ** tonights incrementals could be promoted to totals and leave us with a
2218 ** more balanced cycle.
2221 static void delay_one_dump(disk_t *dp, int delete, ...);
2222 static int promote_highest_priority_incremental(void);
2223 static int promote_hills(void);
2225 /* delay any dumps that will not fit */
2226 static void delay_dumps(void)
2233 off_t new_total; /* New total_size */
2234 char est_kb[20]; /* Text formatted dump size */
2235 int nb_forced_level_0;
2241 biq.head = biq.tail = NULL;
2244 ** 1. Delay dumps that are way oversize.
2246 ** Dumps larger that the size of the tapes we are using are just plain
2247 ** not going to fit no matter how many other dumps we drop. Delay
2248 ** oversize totals until tomorrow (by which time my owner will have
2249 ** resolved the problem!) and drop incrementals altogether. Naturally
2250 ** a large total might be delayed into a large incremental so these
2251 ** need to be checked for separately.
2254 for(dp = schedq.head; dp != NULL; dp = ndp) {
2255 int avail_tapes = 1;
2256 if (dp->tape_splitsize > (off_t)0)
2257 avail_tapes = conf_runtapes;
2259 ndp = dp->next; /* remove_disk zaps this */
2261 full_size = est_tape_size(dp, 0);
2262 if (full_size > tapetype_get_length(tape) * (off_t)avail_tapes) {
2263 char *qname = quote_string(dp->name);
2264 if (conf_runtapes > 1 && dp->tape_splitsize == (off_t)0) {
2265 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"
2266 ", you could define a splitsize"),
2267 dp->host->hostname, qname,
2268 (long long)full_size);
2270 log_add(L_WARNING, _("disk %s:%s, full dump (%lldKB) will be larger than available tape space"),
2271 dp->host->hostname, qname,
2272 (long long)full_size);
2277 if (est(dp)->dump_csize == (off_t)-1 ||
2278 est(dp)->dump_csize <= tapetype_get_length(tape) * (off_t)avail_tapes) {
2282 /* Format dumpsize for messages */
2283 g_snprintf(est_kb, 20, "%lld KB,",
2284 (long long)est(dp)->dump_csize);
2286 if(est(dp)->dump_level == 0) {
2289 message = _("but cannot incremental dump skip-incr disk");
2291 else if(est(dp)->last_level < 0) {
2293 message = _("but cannot incremental dump new disk");
2295 else if(est(dp)->degr_level < 0) {
2297 message = _("but no incremental estimate");
2299 else if (est(dp)->degr_csize > tapetype_get_length(tape)) {
2301 message = _("incremental dump also larger than tape");
2305 message = _("full dump delayed");
2310 message = _("skipping incremental");
2312 delay_one_dump(dp, delete, _("dump larger than available tape space,"),
2313 est_kb, message, NULL);
2317 ** 2. Delay total dumps.
2319 ** Delay total dumps until tomorrow (or the day after!). We start with
2320 ** the lowest priority (most dispensable) and work forwards. We take
2321 ** care not to delay *all* the dumps since this could lead to a stale
2322 ** mate [for any one disk there are only three ways tomorrows dump will
2323 ** be smaller than todays: 1. we do a level 0 today so tomorows dump
2324 ** will be a level 1; 2. the disk gets more data so that it is bumped
2325 ** tomorrow (this can be a slow process); and, 3. the disk looses some
2326 ** data (when does that ever happen?)].
2329 nb_forced_level_0 = 0;
2331 for(dp = schedq.head; dp != NULL && preserve == NULL; dp = dp->next)
2332 if(est(dp)->dump_level == 0)
2335 /* 2.a. Do not delay forced full */
2336 for(dp = schedq.tail;
2337 dp != NULL && total_size > tape_length;
2341 if(est(dp)->dump_level != 0) continue;
2343 get_info(dp->host->hostname, dp->name, &info);
2344 if(info.command & FORCE_FULL) {
2345 nb_forced_level_0 += 1;
2350 if(dp != preserve) {
2352 /* Format dumpsize for messages */
2353 g_snprintf(est_kb, 20, "%lld KB,",
2354 (long long)est(dp)->dump_csize);
2358 message = _("but cannot incremental dump skip-incr disk");
2360 else if(est(dp)->last_level < 0) {
2362 message = _("but cannot incremental dump new disk");
2364 else if(est(dp)->degr_level < 0) {
2366 message = _("but no incremental estimate");
2370 message = _("full dump delayed");
2372 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2377 /* 2.b. Delay forced full if needed */
2378 if(nb_forced_level_0 > 0 && total_size > tape_length) {
2379 for(dp = schedq.tail;
2380 dp != NULL && total_size > tape_length;
2384 if(est(dp)->dump_level == 0 && dp != preserve) {
2386 /* Format dumpsize for messages */
2387 g_snprintf(est_kb, 20, "%lld KB,",
2388 (long long)est(dp)->dump_csize);
2392 message = _("but cannot incremental dump skip-incr disk");
2394 else if(est(dp)->last_level < 0) {
2396 message = _("but cannot incremental dump new disk");
2398 else if(est(dp)->degr_level < 0) {
2400 message = _("but no incremental estimate");
2404 message = _("full dump delayed");
2406 delay_one_dump(dp, delete, _("dumps too big,"), est_kb,
2413 ** 3. Delay incremental dumps.
2415 ** Delay incremental dumps until tomorrow. This is a last ditch attempt
2416 ** at making things fit. Again, we start with the lowest priority (most
2417 ** dispensable) and work forwards.
2420 for(dp = schedq.tail;
2421 dp != NULL && total_size > tape_length;
2425 if(est(dp)->dump_level != 0) {
2427 /* Format dumpsize for messages */
2428 g_snprintf(est_kb, 20, "%lld KB,",
2429 (long long)est(dp)->dump_csize);
2431 delay_one_dump(dp, 1,
2432 _("dumps way too big,"),
2434 _("must skip incremental dumps"),
2440 ** 4. Reinstate delayed dumps.
2442 ** We might not have needed to stomp on all of the dumps we have just
2443 ** delayed above. Try to reinstate them all starting with the last one
2444 ** and working forwards. It is unlikely that the last one will fit back
2445 ** in but why complicate the code?
2448 /*@i@*/ for(bi = biq.tail; bi != NULL; bi = nbi) {
2449 int avail_tapes = 1;
2452 if(dp->tape_splitsize > (off_t)0)
2453 avail_tapes = conf_runtapes;
2456 new_total = total_size + (off_t)tt_blocksize_kb +
2457 bi->csize + (off_t)tape_mark;
2459 new_total = total_size - est(dp)->dump_csize + bi->csize;
2461 if((new_total <= tape_length) &&
2462 (bi->csize < (tapetype_get_length(tape) * (off_t)avail_tapes))) {
2464 total_size = new_total;
2466 if(bi->level == 0) {
2467 total_lev0 += (double) bi->csize;
2469 insert_disk(&schedq, dp, schedule_order);
2472 est(dp)->dump_level = bi->level;
2473 est(dp)->dump_nsize = bi->nsize;
2474 est(dp)->dump_csize = bi->csize;
2478 if(bi->next == NULL)
2479 biq.tail = bi->prev;
2481 (bi->next)->prev = bi->prev;
2482 if(bi->prev == NULL)
2483 biq.head = bi->next;
2485 (bi->prev)->next = bi->next;
2493 ** 5. Output messages about what we have done.
2495 ** We can't output messages while we are delaying dumps because we might
2496 ** reinstate them later. We remember all the messages and output them
2500 /*@i@*/ for(bi = biq.head; bi != NULL; bi = nbi) {
2503 g_fprintf(stderr, "%s: FAILED %s\n", get_pname(), bi->errstr);
2504 log_add(L_FAIL, "%s", bi->errstr);
2508 g_fprintf(stderr, _(" delay: %s now at level %d\n"),
2509 bi->errstr, est(dp)->dump_level);
2510 log_add(L_INFO, "%s", bi->errstr);
2518 g_fprintf(stderr, _(" delay: Total size now %lld.\n"),
2519 (long long)total_size);
2526 * Remove a dump or modify it from full to incremental.
2527 * Keep track of it on the bi q in case we can add it back later.
2530 static void delay_one_dump,
2536 char level_str[NUM_STR_SIZE];
2539 char *qname = quote_string(dp->name);
2540 char *errstr, *qerrstr;
2542 arglist_start(argp, delete);
2544 total_size -= (off_t)tt_blocksize_kb + est(dp)->dump_csize + (off_t)tape_mark;
2545 if(est(dp)->dump_level == 0) {
2546 total_lev0 -= (double) est(dp)->dump_csize;
2549 bi = alloc(SIZEOF(bi_t));
2551 bi->prev = biq.tail;
2552 if(biq.tail == NULL)
2555 biq.tail->next = bi;
2558 bi->deleted = delete;
2560 bi->level = est(dp)->dump_level;
2561 bi->nsize = est(dp)->dump_nsize;
2562 bi->csize = est(dp)->dump_csize;
2564 g_snprintf(level_str, SIZEOF(level_str), "%d", est(dp)->dump_level);
2565 bi->errstr = vstralloc(dp->host->hostname,
2567 " ", planner_timestamp ? planner_timestamp : "?",
2572 while ((next = arglist_val(argp, char *)) != NULL) {
2573 vstrextend(&errstr, sep, next, NULL);
2576 strappend(errstr, "]");
2577 qerrstr = quote_string(errstr);
2578 vstrextend(&bi->errstr, " ", qerrstr, NULL);
2584 remove_disk(&schedq, dp);
2586 est(dp)->dump_level = est(dp)->degr_level;
2587 est(dp)->dump_nsize = est(dp)->degr_nsize;
2588 est(dp)->dump_csize = est(dp)->degr_csize;
2589 total_size += (off_t)tt_blocksize_kb + est(dp)->dump_csize + (off_t)tape_mark;
2596 static int promote_highest_priority_incremental(void)
2598 disk_t *dp, *dp1, *dp_promote;
2599 off_t new_size, new_total, new_lev0;
2601 int nb_today, nb_same_day, nb_today2;
2602 int nb_disk_today, nb_disk_same_day;
2606 * return 1 if did so; must update total_size correctly; must not
2607 * cause total_size to exceed tape_length
2611 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2613 est(dp)->promote = -1000;
2615 if(est_size(dp,0) <= (off_t)0)
2618 if(est(dp)->next_level0 <= 0)
2621 if(est(dp)->next_level0 > dp->maxpromoteday)
2624 new_size = est_tape_size(dp, 0);
2625 new_total = total_size - est(dp)->dump_csize + new_size;
2626 new_lev0 = (off_t)total_lev0 + new_size;
2631 nb_disk_same_day = 0;
2632 for(dp1 = schedq.head; dp1 != NULL; dp1 = dp1->next) {
2633 if(est(dp1)->dump_level == 0)
2635 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2637 if(strcmp(dp->host->hostname, dp1->host->hostname) == 0) {
2638 if(est(dp1)->dump_level == 0)
2640 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2645 /* do not promote if overflow tape */
2646 if(new_total > tape_length)
2649 /* do not promote if overflow balanced size and something today */
2650 /* promote if nothing today */
2651 if((new_lev0 > (off_t)(balanced_size + balance_threshold)) &&
2652 (nb_disk_today > 0))
2655 /* do not promote if only one disk due that day and nothing today */
2656 if(nb_disk_same_day == 1 && nb_disk_today == 0)
2659 nb_today2 = nb_today*nb_today;
2660 if(nb_today == 0 && nb_same_day > 1)
2663 if(nb_same_day >= nb_today2) {
2664 est(dp)->promote = ((nb_same_day - nb_today2)*(nb_same_day - nb_today2)) +
2665 conf_dumpcycle - est(dp)->next_level0;
2668 est(dp)->promote = -nb_today2 +
2669 conf_dumpcycle - est(dp)->next_level0;
2672 qname = quote_string(dp->name);
2673 if(!dp_promote || est(dp_promote)->promote < est(dp)->promote) {
2675 g_fprintf(stderr," try %s:%s %d %d %d = %d\n",
2676 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2679 g_fprintf(stderr,"no try %s:%s %d %d %d = %d\n",
2680 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2688 qname = quote_string(dp->name);
2689 new_size = est_tape_size(dp, 0);
2690 new_total = total_size - est(dp)->dump_csize + new_size;
2691 new_lev0 = (off_t)total_lev0 + new_size;
2693 total_size = new_total;
2694 total_lev0 = (double)new_lev0;
2695 check_days = est(dp)->next_level0;
2696 est(dp)->degr_level = est(dp)->dump_level;
2697 est(dp)->degr_nsize = est(dp)->dump_nsize;
2698 est(dp)->degr_csize = est(dp)->dump_csize;
2699 est(dp)->dump_level = 0;
2700 est(dp)->dump_nsize = est_size(dp, 0);
2701 est(dp)->dump_csize = new_size;
2702 est(dp)->next_level0 = 0;
2705 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2706 dp->host->hostname, qname,
2707 total_lev0, (long long)total_size);
2710 plural(_("Full dump of %s:%s promoted from %d day ahead."),
2711 _("Full dump of %s:%s promoted from %d days ahead."),
2713 dp->host->hostname, qname, check_days);
2721 static int promote_hills(void)
2724 struct balance_stats {
2736 /* If we are already doing a level 0 don't bother */
2740 /* Do the guts of an "amadmin balance" */
2741 my_dumpcycle = conf_dumpcycle;
2742 if(my_dumpcycle > 10000) my_dumpcycle = 10000;
2744 sp = (struct balance_stats *)
2745 alloc(SIZEOF(struct balance_stats) * my_dumpcycle);
2747 for(days = 0; days < my_dumpcycle; days++) {
2749 sp[days].size = (off_t)0;
2752 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2753 days = est(dp)->next_level0; /* This is > 0 by definition */
2754 if(days<my_dumpcycle && !dp->skip_full && dp->strategy != DS_NOFULL &&
2755 dp->strategy != DS_INCRONLY) {
2757 sp[days].size += est(dp)->last_lev0size;
2761 /* Search for a suitable big hill and cut it down */
2763 /* Find the tallest hill */
2764 hill_size = (off_t)0;
2765 for(days = 0; days < my_dumpcycle; days++) {
2766 if(sp[days].disks > 1 && sp[days].size > hill_size) {
2767 hill_size = sp[days].size;
2772 if(hill_size <= (off_t)0) break; /* no suitable hills */
2774 /* Find all the dumps in that hill and try and remove one */
2775 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2776 if(est(dp)->next_level0 != hill_days ||
2777 est(dp)->next_level0 > dp->maxpromoteday ||
2779 dp->strategy == DS_NOFULL ||
2780 dp->strategy == DS_INCRONLY)
2782 new_size = est_tape_size(dp, 0);
2783 new_total = total_size - est(dp)->dump_csize + new_size;
2784 if(new_total > tape_length)
2786 /* We found a disk we can promote */
2787 qname = quote_string(dp->name);
2788 total_size = new_total;
2789 total_lev0 += (double)new_size;
2790 est(dp)->degr_level = est(dp)->dump_level;
2791 est(dp)->degr_nsize = est(dp)->dump_nsize;
2792 est(dp)->degr_csize = est(dp)->dump_csize;
2793 est(dp)->dump_level = 0;
2794 est(dp)->next_level0 = 0;
2795 est(dp)->dump_nsize = est_size(dp, 0);
2796 est(dp)->dump_csize = new_size;
2799 _(" promote: moving %s:%s up, total_lev0 %1.0lf, total_size %lld\n"),
2800 dp->host->hostname, qname,
2801 total_lev0, (long long)total_size);
2804 plural(_("Full dump of %s:%s specially promoted from %d day ahead."),
2805 _("Full dump of %s:%s specially promoted from %d days ahead."),
2807 dp->host->hostname, qname, hill_days);
2813 /* All the disks in that hill were unsuitable. */
2814 sp[hill_days].disks = 0; /* Don't get tricked again */
2822 * ========================================================================
2825 * XXX - memory leak - we shouldn't just throw away *dp
2827 static void output_scheduleline(
2831 time_t dump_time = 0, degr_time = 0;
2832 double dump_kps = 0, degr_kps = 0;
2833 char *schedline = NULL, *degr_str = NULL;
2834 char dump_priority_str[NUM_STR_SIZE];
2835 char dump_level_str[NUM_STR_SIZE];
2836 char dump_nsize_str[NUM_STR_SIZE];
2837 char dump_csize_str[NUM_STR_SIZE];
2838 char dump_time_str[NUM_STR_SIZE];
2839 char dump_kps_str[NUM_STR_SIZE];
2840 char degr_level_str[NUM_STR_SIZE];
2841 char degr_nsize_str[NUM_STR_SIZE];
2842 char degr_csize_str[NUM_STR_SIZE];
2843 char degr_time_str[NUM_STR_SIZE];
2844 char degr_kps_str[NUM_STR_SIZE];
2845 char *dump_date, *degr_date;
2848 char *qname = quote_string(dp->name);
2852 if(ep->dump_csize == (off_t)-1) {
2853 /* no estimate, fail the disk */
2855 _("%s: FAILED %s %s %s %d \"[no estimate]\"\n"),
2857 dp->host->hostname, qname, planner_timestamp, ep->dump_level);
2858 log_add(L_FAIL, _("%s %s %s %d [no estimate]"),
2859 dp->host->hostname, qname, planner_timestamp, ep->dump_level);
2864 dump_date = degr_date = (char *)0;
2865 for(i = 0; i < MAX_LEVELS; i++) {
2866 if(ep->dump_level == ep->level[i])
2867 dump_date = ep->dumpdate[i];
2868 if(ep->degr_level == ep->level[i])
2869 degr_date = ep->dumpdate[i];
2872 #define fix_rate(rate) (rate < 1.0 ? DEFAULT_DUMPRATE : rate)
2874 if(ep->dump_level == 0) {
2875 dump_kps = fix_rate(ep->fullrate);
2876 dump_time = (time_t)((double)ep->dump_csize / dump_kps);
2878 if(ep->degr_csize != (off_t)-1) {
2879 degr_kps = fix_rate(ep->incrrate);
2880 degr_time = (time_t)((double)ep->degr_csize / degr_kps);
2884 dump_kps = fix_rate(ep->incrrate);
2885 dump_time = (time_t)((double)ep->dump_csize / dump_kps);
2888 if(ep->dump_level == 0 && ep->degr_csize != (off_t)-1) {
2889 g_snprintf(degr_level_str, sizeof(degr_level_str),
2890 "%d", ep->degr_level);
2891 g_snprintf(degr_nsize_str, sizeof(degr_nsize_str),
2892 "%lld", (long long)ep->degr_nsize);
2893 g_snprintf(degr_csize_str, sizeof(degr_csize_str),
2894 "%lld", (long long)ep->degr_csize);
2895 g_snprintf(degr_time_str, sizeof(degr_time_str),
2896 "%lld", (long long)degr_time);
2897 g_snprintf(degr_kps_str, sizeof(degr_kps_str),
2899 degr_str = vstralloc(" ", degr_level_str,
2901 " ", degr_nsize_str,
2902 " ", degr_csize_str,
2907 g_snprintf(dump_priority_str, SIZEOF(dump_priority_str),
2908 "%d", ep->dump_priority);
2909 g_snprintf(dump_level_str, SIZEOF(dump_level_str),
2910 "%d", ep->dump_level);
2911 g_snprintf(dump_nsize_str, sizeof(dump_nsize_str),
2912 "%lld", (long long)ep->dump_nsize);
2913 g_snprintf(dump_csize_str, sizeof(dump_csize_str),
2914 "%lld", (long long)ep->dump_csize);
2915 g_snprintf(dump_time_str, sizeof(dump_time_str),
2916 "%lld", (long long)dump_time);
2917 g_snprintf(dump_kps_str, sizeof(dump_kps_str),
2919 features = am_feature_to_string(dp->host->features);
2920 schedline = vstralloc("DUMP ",dp->host->hostname,
2923 " ", planner_timestamp,
2924 " ", dump_priority_str,
2925 " ", dump_level_str,
2927 " ", dump_nsize_str,
2928 " ", dump_csize_str,
2931 degr_str ? degr_str : "",
2934 fputs(schedline, stdout);
2935 fputs(schedline, stderr);