2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1999 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: planner.c,v 1.206 2006/08/10 23:57:27 paddy_s Exp $
29 * backup schedule planner for the Amanda backup system.
43 #include "amfeatures.h"
44 #include "server_util.h"
47 #define planner_debug(i,x) do { \
48 if ((i) <= debug_planner) { \
53 #define MAX_LEVELS 3 /* max# of estimates per filesys */
55 #define RUNS_REDZONE 5 /* should be in conf file? */
57 #define PROMOTE_THRESHOLD 0.05 /* if <5% unbalanced, don't promote */
58 #define DEFAULT_DUMPRATE 1024.0 /* K/s */
60 /* configuration file stuff */
63 off_t conf_maxdumpsize;
66 int conf_runspercycle;
71 int conf_usetimestamps;
73 #define HOST_READY ((void *)0) /* must be 0 */
74 #define HOST_ACTIVE ((void *)1)
75 #define HOST_DONE ((void *)2)
77 #define DISK_READY 0 /* must be 0 */
79 #define DISK_PARTIALY_DONE 2
82 typedef struct est_s {
87 off_t dump_nsize; /* native size */
88 off_t dump_csize; /* compressed size */
89 int degr_level; /* if dump_level == 0, what would be the inc level */
90 off_t degr_nsize; /* native degraded size */
91 off_t degr_csize; /* compressed degraded size */
97 double fullrate, incrrate;
98 double fullcomp, incrcomp;
100 int level[MAX_LEVELS];
101 char *dumpdate[MAX_LEVELS];
102 off_t est_size[MAX_LEVELS];
105 #define est(dp) ((est_t *)(dp)->up)
107 /* pestq = partial estimate */
108 disklist_t startq, waitq, pestq, estq, failq, schedq;
110 double total_lev0, balanced_size, balance_threshold;
116 size_t tt_blocksize_kb;
117 int runs_per_cycle = 0;
119 char *planner_timestamp = NULL;
121 static am_feature_t *our_features = NULL;
122 static char *our_feature_string = NULL;
124 /* We keep a LIFO queue of before images for all modifications made
125 * to schedq in our attempt to make the schedule fit on the tape.
126 * Enough information is stored to reinstate a dump if it turns out
127 * that it shouldn't have been touched after all.
129 typedef struct bi_s {
132 int deleted; /* 0=modified, 1=deleted */
133 disk_t *dp; /* The disk that was changed */
134 int level; /* The original level */
135 off_t nsize; /* The original native size */
136 off_t csize; /* The original compressed size */
137 char *errstr; /* A message describing why this disk is here */
140 typedef struct bilist_s {
144 bilist_t biq; /* The BI queue itself */
147 * ========================================================================
152 static void setup_estimate(disk_t *dp);
153 static void get_estimates(void);
154 static void analyze_estimate(disk_t *dp);
155 static void handle_failed(disk_t *dp);
156 static void delay_dumps(void);
157 static int promote_highest_priority_incremental(void);
158 static int promote_hills(void);
159 static void output_scheduleline(disk_t *dp);
160 int main(int, char **);
162 int main(int argc, char **argv)
167 unsigned long malloc_hist_1, malloc_size_1;
168 unsigned long malloc_hist_2, malloc_size_2;
175 times_t section_start;
178 int new_argc, my_argc;
179 char **new_argv, **my_argv;
185 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
187 parse_conf(argc, argv, &new_argc, &new_argv);
192 config_name = stralloc(my_argv[1]);
193 config_dir = vstralloc(CONFIG_DIR, "/", config_name, "/", NULL);
195 char my_cwd[STR_SIZE];
197 if (getcwd(my_cwd, SIZEOF(my_cwd)) == NULL) {
198 error("cannot determine current working directory");
201 config_dir = stralloc2(my_cwd, "/");
202 if ((config_name = strrchr(my_cwd, '/')) != NULL) {
203 config_name = stralloc(config_name + 1);
209 set_pname("planner");
211 dbopen(DBG_SUBDIR_SERVER);
213 /* Don't die when child closes pipe */
214 signal(SIGPIPE, SIG_IGN);
216 malloc_size_1 = malloc_inuse(&malloc_hist_1);
218 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
219 set_logerror(logerror);
221 section_start = curclock();
223 our_features = am_init_feature_set();
224 our_feature_string = am_feature_to_string(our_features);
226 fprintf(stderr, "%s: pid %ld executable %s version %s\n",
227 get_pname(), (long) getpid(), my_argv[0], version());
228 for (i = 0; version_info[i] != NULL; i++)
229 fprintf(stderr, "%s: %s", get_pname(), version_info[i]);
232 * 1. Networking Setup
234 * Planner runs setuid to get a priviledged socket for BSD security.
235 * We get the socket right away as root, then set euid to normal
236 * user. Keeping saved uid as root.
248 * From this point on we are running under our real uid, so we don't
249 * have to worry about opening security holes below. Make sure we
253 if(getpwuid(getuid()) == NULL) {
254 error("can't get login name for my uid %ld", (long)getuid());
259 * 2. Read in Configuration Information
261 * All the Amanda configuration files are loaded before we begin.
264 fprintf(stderr,"READING CONF FILES...\n");
266 conffile = stralloc2(config_dir, CONFFILE_NAME);
267 if(read_conffile(conffile)) {
268 error("errors processing config file \"%s\"", conffile);
273 dbrename(config_name, DBG_SUBDIR_SERVER);
275 report_bad_conf_arg();
277 conf_diskfile = getconf_str(CNF_DISKFILE);
278 if (*conf_diskfile == '/') {
279 conf_diskfile = stralloc(conf_diskfile);
281 conf_diskfile = stralloc2(config_dir, conf_diskfile);
283 if (read_diskfile(conf_diskfile, &origq) < 0) {
284 error("could not load disklist \"%s\"", conf_diskfile);
287 if(origq.head == NULL) {
288 error("empty disklist \"%s\"", conf_diskfile);
292 errstr = match_disklist(&origq, my_argc-2, my_argv+2);
294 fprintf(stderr,"%s",errstr);
298 for(dp = origq.head; dp != NULL; dp = dp->next) {
300 qname = quote_string(dp->name);
301 log_add(L_DISK, "%s %s", dp->host->hostname, qname);
308 error("no DLE to backup");
311 amfree(conf_diskfile);
313 conf_tapelist = getconf_str(CNF_TAPELIST);
314 if (*conf_tapelist == '/') {
315 conf_tapelist = stralloc(conf_tapelist);
317 conf_tapelist = stralloc2(config_dir, conf_tapelist);
319 if(read_tapelist(conf_tapelist)) {
320 error("could not load tapelist \"%s\"", conf_tapelist);
323 amfree(conf_tapelist);
325 conf_infofile = getconf_str(CNF_INFOFILE);
326 if (*conf_infofile == '/') {
327 conf_infofile = stralloc(conf_infofile);
329 conf_infofile = stralloc2(config_dir, conf_infofile);
331 if(open_infofile(conf_infofile)) {
332 error("could not open info db \"%s\"", conf_infofile);
335 if (check_infofile(conf_infofile, &origq, &errstr) == -1) {
336 log_add(L_WARNING, "problem copying infofile: %s", errstr);
339 amfree(conf_infofile);
341 conf_tapetype = getconf_str(CNF_TAPETYPE);
342 conf_maxdumpsize = getconf_am64(CNF_MAXDUMPSIZE);
343 conf_runtapes = getconf_int(CNF_RUNTAPES);
344 conf_dumpcycle = getconf_int(CNF_DUMPCYCLE);
345 conf_runspercycle = getconf_int(CNF_RUNSPERCYCLE);
346 conf_tapecycle = getconf_int(CNF_TAPECYCLE);
347 conf_etimeout = (time_t)getconf_int(CNF_ETIMEOUT);
348 conf_reserve = getconf_int(CNF_RESERVE);
349 conf_autoflush = getconf_boolean(CNF_AUTOFLUSH);
350 conf_usetimestamps = getconf_boolean(CNF_USETIMESTAMPS);
352 amfree(planner_timestamp);
354 if(conf_usetimestamps == 0) {
355 planner_timestamp = construct_datestamp(NULL);
358 planner_timestamp = construct_timestamp(NULL);
360 log_add(L_START, "date %s", planner_timestamp);
361 printf("DATE %s\n", planner_timestamp);
363 fprintf(stderr, "%s: timestamp %s\n",
364 get_pname(), planner_timestamp);
366 /* some initializations */
368 if(conf_runspercycle == 0) {
369 runs_per_cycle = conf_dumpcycle;
370 } else if(conf_runspercycle == -1 ) {
371 runs_per_cycle = guess_runs_from_tapelist();
373 runs_per_cycle = conf_runspercycle;
375 if (runs_per_cycle <= 0) {
380 * do some basic sanity checking
382 if(conf_tapecycle <= runs_per_cycle) {
383 log_add(L_WARNING, "tapecycle (%d) <= runspercycle (%d)",
384 conf_tapecycle, runs_per_cycle);
387 tape = lookup_tapetype(conf_tapetype);
388 if(conf_maxdumpsize > (off_t)0) {
389 tape_length = (off_t)conf_maxdumpsize;
392 tape_length = tapetype_get_length(tape) * (off_t)conf_runtapes;
394 tape_mark = (size_t)tapetype_get_filemark(tape);
395 tt_blocksize_kb = (size_t)tapetype_get_blocksize(tape);
396 tt_blocksize = tt_blocksize_kb * 1024;
398 fprintf(stderr, "%s: time %s: startup took %s secs\n",
400 walltime_str(curclock()),
401 walltime_str(timessub(curclock(), section_start)));
404 * 3. Send autoflush dumps left on the holding disks
406 * This should give us something to do while we generate the new
410 fprintf(stderr,"\nSENDING FLUSHES...\n");
416 char *qdisk, *qhname;
417 holding_list = holding_get_files_for_flush(NULL, 0);
418 for(holding_file=holding_list->first; holding_file != NULL;
419 holding_file = holding_file->next) {
420 holding_file_get_dumpfile(holding_file->name, &file);
422 if (holding_file_size(holding_file->name, 1) <= 0) {
423 log_add(L_INFO, "%s: removing file with no data.",
425 holding_file_unlink(holding_file->name);
429 qdisk = quote_string(file.disk);
430 qhname = quote_string(holding_file->name);
431 log_add(L_DISK, "%s %s", file.name, qdisk);
433 "FLUSH %s %s %s %d %s\n",
440 "FLUSH %s %s %s %d %s\n",
449 free_sl(holding_list);
452 fprintf(stderr, "ENDFLUSH\n");
453 fprintf(stdout, "ENDFLUSH\n");
457 * 4. Calculate Preliminary Dump Levels
459 * Before we can get estimates from the remote slave hosts, we make a
460 * first attempt at guessing what dump levels we will be dumping at
461 * based on the curinfo database.
464 fprintf(stderr,"\nSETTING UP FOR ESTIMATES...\n");
465 section_start = curclock();
467 startq.head = startq.tail = NULL;
468 while(!empty(origq)) {
469 disk_t *dp = dequeue_disk(&origq);
475 fprintf(stderr, "%s: time %s: setting up estimates took %s secs\n",
477 walltime_str(curclock()),
478 walltime_str(timessub(curclock(), section_start)));
482 * 5. Get Dump Size Estimates from Remote Client Hosts
484 * Each host is queried (in parallel) for dump size information on all
485 * of its disks, and the results gathered as they come in.
488 /* go out and get the dump estimates */
490 fprintf(stderr,"\nGETTING ESTIMATES...\n");
491 section_start = curclock();
493 estq.head = estq.tail = NULL;
494 pestq.head = pestq.tail = NULL;
495 waitq.head = waitq.tail = NULL;
496 failq.head = failq.tail = NULL;
500 fprintf(stderr, "%s: time %s: getting estimates took %s secs\n",
502 walltime_str(curclock()),
503 walltime_str(timessub(curclock(), section_start)));
506 * At this point, all disks with estimates are in estq, and
507 * all the disks on hosts that didn't respond to our inquiry
511 dump_queue("FAILED", failq, 15, stderr);
512 dump_queue("DONE", estq, 15, stderr);
516 * 6. Analyze Dump Estimates
518 * Each disk's estimates are looked at to determine what level it
519 * should dump at, and to calculate the expected size and time taking
520 * historical dump rates and compression ratios into account. The
521 * total expected size is accumulated as well.
524 fprintf(stderr,"\nANALYZING ESTIMATES...\n");
525 section_start = curclock();
527 /* an empty tape still has a label and an endmark */
528 total_size = ((off_t)tt_blocksize_kb + (off_t)tape_mark) * (off_t)2;
532 schedq.head = schedq.tail = NULL;
533 while(!empty(estq)) analyze_estimate(dequeue_disk(&estq));
534 while(!empty(failq)) handle_failed(dequeue_disk(&failq));
537 * At this point, all the disks are on schedq sorted by priority.
538 * The total estimated size of the backups is in total_size.
544 fprintf(stderr, "INITIAL SCHEDULE (size " OFF_T_FMT "):\n",
545 (OFF_T_FMT_TYPE)total_size);
546 for(dp = schedq.head; dp != NULL; dp = dp->next) {
547 qname = quote_string(dp->name);
548 fprintf(stderr, " %s %s pri %d lev %d nsize " OFF_T_FMT " csize " OFF_T_FMT "\n",
549 dp->host->hostname, qname, est(dp)->dump_priority,
551 (OFF_T_FMT_TYPE)est(dp)->dump_nsize,
552 (OFF_T_FMT_TYPE)est(dp)->dump_csize);
559 * 7. Delay Dumps if Schedule Too Big
561 * If the generated schedule is too big to fit on the tape, we need to
562 * delay some full dumps to make room. Incrementals will be done
563 * instead (except for new or forced disks).
565 * In extreme cases, delaying all the full dumps is not even enough.
566 * If so, some low-priority incrementals will be skipped completely
567 * until the dumps fit on the tape.
570 fprintf(stderr, "\nDELAYING DUMPS IF NEEDED, total_size " OFF_T_FMT
571 ", tape length " OFF_T_FMT " mark " SIZE_T_FMT "\n",
572 (OFF_T_FMT_TYPE)total_size,
573 (OFF_T_FMT_TYPE)tape_length,
574 (SIZE_T_FMT_TYPE)tape_mark);
576 initial_size = total_size;
580 /* XXX - why bother checking this? */
581 if(empty(schedq) && total_size < initial_size) {
582 error("cannot fit anything on tape, bailing out");
588 * 8. Promote Dumps if Schedule Too Small
590 * Amanda attempts to balance the full dumps over the length of the
591 * dump cycle. If this night's full dumps are too small relative to
592 * the other nights, promote some high-priority full dumps that will be
593 * due for the next run, to full dumps for tonight, taking care not to
594 * overflow the tape size.
596 * This doesn't work too well for small sites. For these we scan ahead
597 * looking for nights that have an excessive number of dumps and promote
600 * Amanda never delays full dumps just for the sake of balancing the
601 * schedule, so it can take a full cycle to balance the schedule after
606 "\nPROMOTING DUMPS IF NEEDED, total_lev0 %1.0lf, balanced_size %1.0lf...\n",
607 total_lev0, balanced_size);
609 balance_threshold = balanced_size * PROMOTE_THRESHOLD;
611 while((balanced_size - total_lev0) > balance_threshold && moved_one)
612 moved_one = promote_highest_priority_incremental();
614 moved_one = promote_hills();
616 fprintf(stderr, "%s: time %s: analysis took %s secs\n",
618 walltime_str(curclock()),
619 walltime_str(timessub(curclock(), section_start)));
622 /* done with prvileged ops, make sure root privilege is dropped */
623 if ( geteuid() == 0 ) {
631 * The schedule goes to stdout, presumably to driver. A copy is written
632 * on stderr for the debug file.
635 fprintf(stderr,"\nGENERATING SCHEDULE:\n--------\n");
637 while(!empty(schedq)) output_scheduleline(dequeue_disk(&schedq));
638 fprintf(stderr, "--------\n");
641 log_add(L_FINISH, "date %s time %s", planner_timestamp, walltime_str(curclock()));
644 free_new_argv(new_argc, new_argv);
645 free_server_config();
646 amfree(planner_timestamp);
649 amfree(our_feature_string);
650 am_release_feature_set(our_features);
653 malloc_size_2 = malloc_inuse(&malloc_hist_2);
655 if(malloc_size_1 != malloc_size_2) {
656 malloc_list(fileno(stderr), malloc_hist_1, malloc_hist_2);
667 * ========================================================================
668 * SETUP FOR ESTIMATES
672 static void askfor(est_t *, int, int, info_t *);
673 static int last_level(info_t *info); /* subroutines */
674 static off_t est_size(disk_t *dp, int level);
675 static off_t est_tape_size(disk_t *dp, int level);
676 static int next_level0(disk_t *dp, info_t *info);
677 static int runs_at(info_t *info, int lev);
678 static off_t bump_thresh(int level, off_t size_level_0, int bumppercent, off_t bumpsize, double bumpmult);
679 static int when_overwrite(char *label);
682 est_t *ep, /* esimate data block */
683 int seq, /* sequence number of request */
684 int lev, /* dump level being requested */
685 info_t *info) /* info block for disk */
687 if(seq < 0 || seq >= MAX_LEVELS) {
688 error("error [planner askfor: seq out of range 0..%d: %d]",
692 if(lev < -1 || lev >= DUMP_LEVELS) {
693 error("error [planner askfor: lev out of range -1..%d: %d]",
700 ep->dumpdate[seq] = (char *)0;
701 ep->est_size[seq] = (off_t)-2;
705 ep->level[seq] = lev;
707 ep->dumpdate[seq] = stralloc(get_dumpdate(info,lev));
708 malloc_mark(ep->dumpdate[seq]);
710 ep->est_size[seq] = (off_t)-2;
725 assert(dp && dp->host);
727 qname = quote_string(dp->name);
728 fprintf(stderr, "%s: time %s: setting up estimates for %s:%s\n",
729 get_pname(), walltime_str(curclock()),
730 dp->host->hostname, qname);
732 /* get current information about disk */
734 if(get_info(dp->host->hostname, dp->name, &info)) {
735 /* no record for this disk, make a note of it */
736 log_add(L_INFO, "Adding new disk %s:%s.", dp->host->hostname, dp->name);
739 /* setup working data struct for disk */
741 ep = alloc(SIZEOF(est_t));
743 dp->up = (void *) ep;
744 ep->state = DISK_READY;
745 ep->dump_nsize = (off_t)-1;
746 ep->dump_csize = (off_t)-1;
747 ep->dump_priority = dp->priority;
751 /* calculated fields */
753 if (ISSET(info.command, FORCE_FULL)) {
754 /* force a level 0, kind of like a new disk */
755 if(dp->strategy == DS_NOFULL) {
757 * XXX - Not sure what it means to force a no-full disk. The
758 * purpose of no-full is to just dump changes relative to a
759 * stable base, for example root partitions that vary only
760 * slightly from a site-wide prototype. Only the variations
763 * If we allow a level 0 onto the Amanda cycle, then we are
764 * hosed when that tape gets re-used next. Disallow this for
768 "Cannot force full dump of %s:%s with no-full option.",
769 dp->host->hostname, qname);
771 /* clear force command */
772 CLR(info.command, FORCE_FULL);
773 if(put_info(dp->host->hostname, dp->name, &info)) {
774 error("could not put info record for %s:%s: %s",
775 dp->host->hostname, qname, strerror(errno));
778 ep->last_level = last_level(&info);
779 ep->next_level0 = next_level0(dp, &info);
783 ep->next_level0 = -conf_dumpcycle;
784 log_add(L_INFO, "Forcing full dump of %s:%s as directed.",
785 dp->host->hostname, qname);
788 else if(dp->strategy == DS_NOFULL) {
789 /* force estimate of level 1 */
791 ep->next_level0 = next_level0(dp, &info);
794 ep->last_level = last_level(&info);
795 ep->next_level0 = next_level0(dp, &info);
798 /* adjust priority levels */
800 /* warn if dump will be overwritten */
801 if (ep->last_level > -1 && strlen(info.inf[0].label) > 0) {
802 overwrite_runs = when_overwrite(info.inf[0].label);
803 if(overwrite_runs == 0) {
804 log_add(L_WARNING, "Last full dump of %s:%s "
805 "on tape %s overwritten on this run.",
806 dp->host->hostname, qname, info.inf[0].label);
808 else if(overwrite_runs <= RUNS_REDZONE) {
809 log_add(L_WARNING, "Last full dump of %s:%s on "
810 "tape %s overwritten in %d run%s.",
811 dp->host->hostname, qname, info.inf[0].label,
812 overwrite_runs, overwrite_runs == 1? "" : "s");
816 if(ep->next_level0 < 0) {
817 fprintf(stderr,"%s:%s overdue %d day%s for level 0\n",
818 dp->host->hostname, qname,
819 - ep->next_level0, ((- ep->next_level0) == 1) ? "" : "s");
820 ep->dump_priority -= ep->next_level0;
822 else if (ISSET(info.command, FORCE_FULL))
823 ep->dump_priority += 1;
824 /* else XXX bump up the priority of incrementals that failed last night */
826 /* handle external level 0 dumps */
828 if(dp->skip_full && dp->strategy != DS_NOINC) {
829 if(ep->next_level0 <= 0) {
830 /* update the date field */
831 info.inf[0].date = today;
832 CLR(info.command, FORCE_FULL);
833 ep->next_level0 += conf_dumpcycle;
835 if(put_info(dp->host->hostname, dp->name, &info)) {
836 error("could not put info record for %s:%s: %s",
837 dp->host->hostname, qname, strerror(errno));
840 log_add(L_INFO, "Skipping full dump of %s:%s today.",
841 dp->host->hostname, qname);
842 fprintf(stderr,"%s:%s lev 0 skipped due to skip-full flag\n",
843 dp->host->hostname, qname);
844 /* don't enqueue the disk */
845 askfor(ep, 0, -1, &info);
846 askfor(ep, 1, -1, &info);
847 askfor(ep, 2, -1, &info);
848 fprintf(stderr, "%s: SKIPPED %s %s 0 [skip-full]\n",
849 get_pname(), dp->host->hostname, qname);
850 log_add(L_SUCCESS, "%s %s %s 0 [skipped: skip-full]",
851 dp->host->hostname, qname, planner_timestamp);
856 if(ep->last_level == -1) {
857 /* probably a new disk, but skip-full means no full! */
861 if(ep->next_level0 == 1) {
862 log_add(L_WARNING, "Skipping full dump of %s:%s tomorrow.",
863 dp->host->hostname, qname);
867 if(dp->strategy == DS_INCRONLY && ep->last_level == -1 && !ISSET(info.command, FORCE_FULL)) {
868 /* don't enqueue the disk */
869 askfor(ep, 0, -1, &info);
870 askfor(ep, 1, -1, &info);
871 askfor(ep, 2, -1, &info);
872 log_add(L_FAIL, "%s %s 19000101 1 [Skipping incronly because no full dump were done]",
873 dp->host->hostname, qname);
874 fprintf(stderr,"%s:%s lev 1 skipped due to strategy incronly and no full dump were done\n",
875 dp->host->hostname, qname);
880 /* handle "skip-incr" type archives */
882 if(dp->skip_incr && ep->next_level0 > 0) {
883 fprintf(stderr,"%s:%s lev 1 skipped due to skip-incr flag\n",
884 dp->host->hostname, qname);
885 /* don't enqueue the disk */
886 askfor(ep, 0, -1, &info);
887 askfor(ep, 1, -1, &info);
888 askfor(ep, 2, -1, &info);
890 fprintf(stderr, "%s: SKIPPED %s %s 1 [skip-incr]\n",
891 get_pname(), dp->host->hostname, qname);
893 log_add(L_SUCCESS, "%s %s %s 1 [skipped: skip-incr]",
894 dp->host->hostname, qname, planner_timestamp);
899 if( ep->last_level == -1 && ep->next_level0 > 0 &&
900 dp->strategy != DS_NOFULL && dp->strategy != DS_INCRONLY &&
901 conf_reserve == 100) {
902 log_add(L_WARNING, "%s:%s mismatch: no tapelist record, "
903 "but curinfo next_level0: %d.",
904 dp->host->hostname, qname, ep->next_level0);
908 if(ep->last_level == 0) ep->level_days = 0;
909 else ep->level_days = runs_at(&info, ep->last_level);
910 ep->last_lev0size = info.inf[0].csize;
912 ep->fullrate = perf_average(info.full.rate, 0.0);
913 ep->incrrate = perf_average(info.incr.rate, 0.0);
915 ep->fullcomp = perf_average(info.full.comp, dp->comprate[0]);
916 ep->incrcomp = perf_average(info.incr.comp, dp->comprate[1]);
918 /* determine which estimates to get */
922 if (dp->strategy == DS_NOINC ||
924 (!ISSET(info.command, FORCE_BUMP) ||
926 ep->last_level == -1))) {
927 if(info.command & FORCE_BUMP && ep->last_level == -1) {
929 "Remove force-bump command of %s:%s because it's a new disk.",
930 dp->host->hostname, qname);
932 switch (dp->strategy) {
935 askfor(ep, i++, 0, &info);
937 log_add(L_INFO, "Ignoring skip_full for %s:%s "
938 "because the strategy is NOINC.",
939 dp->host->hostname, qname);
941 if(info.command & FORCE_BUMP) {
943 "Ignoring FORCE_BUMP for %s:%s because the strategy is NOINC.",
944 dp->host->hostname, qname);
953 if (ISSET(info.command, FORCE_FULL))
954 askfor(ep, i++, 0, &info);
959 if(!dp->skip_incr && !(dp->strategy == DS_NOINC)) {
960 if(ep->last_level == -1) { /* a new disk */
961 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY) {
962 askfor(ep, i++, 1, &info);
964 assert(!dp->skip_full); /* should be handled above */
966 } else { /* not new, pick normally */
969 curr_level = ep->last_level;
971 if (ISSET(info.command, FORCE_NO_BUMP)) {
972 if(curr_level > 0) { /* level 0 already asked for */
973 askfor(ep, i++, curr_level, &info);
975 log_add(L_INFO,"Preventing bump of %s:%s as directed.",
976 dp->host->hostname, qname);
977 } else if (ISSET(info.command, FORCE_BUMP)
978 && curr_level + 1 < DUMP_LEVELS) {
979 askfor(ep, i++, curr_level+1, &info);
980 log_add(L_INFO,"Bumping of %s:%s at level %d as directed.",
981 dp->host->hostname, qname, curr_level+1);
982 } else if (curr_level == 0) {
983 askfor(ep, i++, 1, &info);
985 askfor(ep, i++, curr_level, &info);
987 * If last time we dumped less than the threshold, then this
988 * time we will too, OR the extra size will be charged to both
989 * cur_level and cur_level + 1, so we will never bump. Also,
990 * if we haven't been at this level 2 days, or the dump failed
991 * last night, we can't bump.
993 if((info.inf[curr_level].size == (off_t)0 || /* no data, try it anyway */
994 (((info.inf[curr_level].size > bump_thresh(curr_level, info.inf[0].size,dp->bumppercent, dp->bumpsize, dp->bumpmult)))
995 && ep->level_days >= dp->bumpdays))
996 && curr_level + 1 < DUMP_LEVELS) {
997 askfor(ep, i++, curr_level+1, &info);
1003 while(i < MAX_LEVELS) /* mark end of estimates */
1004 askfor(ep, i++, -1, &info);
1008 fprintf(stderr, "setup_estimate: %s:%s: command %u, options: %s "
1009 "last_level %d next_level0 %d level_days %d getting estimates "
1010 "%d (" OFF_T_FMT ") %d (" OFF_T_FMT ") %d (" OFF_T_FMT ")\n",
1011 dp->host->hostname, qname, info.command,
1012 dp->strategy == DS_NOFULL ? "no-full" :
1013 dp->strategy == DS_INCRONLY ? "incr-only" :
1014 dp->skip_full ? "skip-full" :
1015 dp->skip_incr ? "skip-incr" : "none",
1016 ep->last_level, ep->next_level0, ep->level_days,
1017 ep->level[0], (OFF_T_FMT_TYPE)ep->est_size[0],
1018 ep->level[1], (OFF_T_FMT_TYPE)ep->est_size[1],
1019 ep->level[2], (OFF_T_FMT_TYPE)ep->est_size[2]);
1021 assert(ep->level[0] != -1);
1022 enqueue_disk(&startq, dp);
1026 static int when_overwrite(
1032 runtapes = conf_runtapes;
1033 if(runtapes == 0) runtapes = 1;
1035 if((tp = lookup_tapelabel(label)) == NULL)
1036 return 1; /* "shouldn't happen", but trigger warning message */
1037 else if(tp->reuse == 0)
1039 else if(lookup_nb_tape() > conf_tapecycle)
1040 return (lookup_nb_tape() - tp->position) / runtapes;
1042 return (conf_tapecycle - tp->position) / runtapes;
1045 /* Return the estimated size for a particular dump */
1046 static off_t est_size(
1052 for(i = 0; i < MAX_LEVELS; i++) {
1053 if(level == est(dp)->level[i])
1054 return est(dp)->est_size[i];
1059 /* Return the estimated on-tape size of a particular dump */
1060 static off_t est_tape_size(
1067 size = est_size(dp, level);
1069 if(size == (off_t)-1) return size;
1071 if(dp->compress == COMP_NONE)
1074 if(level == 0) ratio = est(dp)->fullcomp;
1075 else ratio = est(dp)->incrcomp;
1078 * make sure over-inflated compression ratios don't throw off the
1079 * estimates, this is mostly for when you have a small dump getting
1080 * compressed which takes up alot more disk/tape space relatively due
1081 * to the overhead of the compression. This is specifically for
1082 * Digital Unix vdump. This patch is courtesy of Rudolf Gabler
1083 * (RUG@USM.Uni-Muenchen.DE)
1086 if(ratio > 1.1) ratio = 1.1;
1088 size = (off_t)((double)size * ratio);
1091 * Ratio can be very small in some error situations, so make sure
1092 * size goes back greater than zero. It may not be right, but
1093 * indicates we did get an estimate.
1095 if(size <= (off_t)0) {
1103 /* what was the level of the last successful dump to tape? */
1104 static int last_level(
1107 int min_pos, min_level, i;
1108 time_t lev0_date, last_date;
1111 if(info->last_level != -1)
1112 return info->last_level;
1114 /* to keep compatibility with old infofile */
1115 min_pos = 1000000000;
1119 for(i = 0; i < 9; i++) {
1120 if(conf_reserve < 100) {
1121 if(i == 0) lev0_date = info->inf[0].date;
1122 else if(info->inf[i].date < lev0_date) continue;
1123 if(info->inf[i].date > last_date) {
1124 last_date = info->inf[i].date;
1129 if((tp = lookup_tapelabel(info->inf[i].label)) == NULL) continue;
1130 /* cull any entries from previous cycles */
1131 if(i == 0) lev0_date = info->inf[0].date;
1132 else if(info->inf[i].date < lev0_date) continue;
1134 if(tp->position < min_pos) {
1135 min_pos = tp->position;
1140 info->last_level = i;
1144 /* when is next level 0 due? 0 = today, 1 = tomorrow, etc*/
1150 if(dp->strategy == DS_NOFULL || dp->strategy == DS_INCRONLY)
1151 return 1; /* fake it */
1152 else if (dp->strategy == DS_NOINC)
1154 else if(info->inf[0].date < (time_t)0)
1155 return -days_diff(EPOCH, today); /* new disk */
1157 return dp->dumpcycle - days_diff(info->inf[0].date, today);
1160 /* how many runs at current level? */
1165 tape_t *cur_tape, *old_tape;
1168 last = last_level(info);
1169 if(lev != last) return 0;
1170 if(lev == 0) return 1;
1172 if(info->consecutive_runs != -1)
1173 return info->consecutive_runs;
1175 /* to keep compatibility with old infofile */
1176 cur_tape = lookup_tapelabel(info->inf[lev].label);
1177 old_tape = lookup_tapelabel(info->inf[lev-1].label);
1178 if(cur_tape == NULL || old_tape == NULL) return 0;
1180 if(conf_runtapes == 0)
1181 nb_runs = (old_tape->position - cur_tape->position) / 1;
1183 nb_runs = (old_tape->position - cur_tape->position) / conf_runtapes;
1184 info->consecutive_runs = nb_runs;
1190 static off_t bump_thresh(
1199 if ((bumppercent != 0) && (size_level_0 > (off_t)1024)) {
1200 bump = ((double)size_level_0 * (double)bumppercent) / 100.0;
1203 bump = (double)bumpsize;
1205 while(--level) bump = bump * bumpmult;
1213 * ========================================================================
1214 * GET REMOTE DUMP SIZE ESTIMATES
1218 static void getsize(am_host_t *hostp);
1219 static disk_t *lookup_hostdisk(am_host_t *hp, char *str);
1220 static void handle_result(void *datap, pkt_t *pkt, security_handle_t *sech);
1223 static void get_estimates(void)
1227 int something_started;
1229 something_started = 1;
1230 while(something_started) {
1231 something_started = 0;
1232 for(dp = startq.head; dp != NULL; dp = dp->next) {
1234 if(hostp->up == HOST_READY) {
1235 something_started = 1;
1239 * dp is no longer on startq, so dp->next is not valid
1240 * and we have to start all over.
1248 while(!empty(waitq)) {
1249 disk_t *dp = dequeue_disk(&waitq);
1250 est(dp)->errstr = "hmm, disk was stranded on waitq";
1251 enqueue_disk(&failq, dp);
1254 while(!empty(pestq)) {
1255 disk_t *dp = dequeue_disk(&pestq);
1256 char * qname = quote_string(dp->name);
1258 if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < (off_t)0) {
1259 if(est(dp)->est_size[0] == (off_t)-1) {
1260 log_add(L_WARNING, "disk %s:%s, estimate of level %d failed.",
1261 dp->host->hostname, qname, est(dp)->level[0]);
1265 "disk %s:%s, estimate of level %d timed out.",
1266 dp->host->hostname, qname, est(dp)->level[0]);
1268 est(dp)->level[0] = -1;
1271 if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < (off_t)0) {
1272 if(est(dp)->est_size[1] == (off_t)-1) {
1274 "disk %s:%s, estimate of level %d failed.",
1275 dp->host->hostname, qname, est(dp)->level[1]);
1279 "disk %s:%s, estimate of level %d timed out.",
1280 dp->host->hostname, qname, est(dp)->level[1]);
1282 est(dp)->level[1] = -1;
1285 if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < (off_t)0) {
1286 if(est(dp)->est_size[2] == (off_t)-1) {
1288 "disk %s:%s, estimate of level %d failed.",
1289 dp->host->hostname, qname, est(dp)->level[2]);
1293 "disk %s:%s, estimate of level %d timed out.",
1294 dp->host->hostname, qname, est(dp)->level[2]);
1296 est(dp)->level[2] = -1;
1299 if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > (off_t)0) ||
1300 (est(dp)->level[1] != -1 && est(dp)->est_size[1] > (off_t)0) ||
1301 (est(dp)->level[2] != -1 && est(dp)->est_size[2] > (off_t)0)) {
1302 enqueue_disk(&estq, dp);
1305 est(dp)->errstr = vstralloc("disk ", qname,
1306 ", all estimate timed out", NULL);
1307 enqueue_disk(&failq, dp);
1313 static void getsize(
1316 char number[NUM_STR_SIZE], *req;
1319 time_t estimates, timeout;
1321 const security_driver_t *secdrv;
1327 assert(hostp->disks != NULL);
1329 if(hostp->up != HOST_READY) {
1334 * The first time through here we send a "noop" request. This will
1335 * return the feature list from the client if it supports that.
1336 * If it does not, handle_result() will set the feature list to an
1337 * empty structure. In either case, we do the disks on the second
1338 * (and subsequent) pass(es).
1340 if(hostp->features != NULL) { /* sendsize service */
1344 int has_features = am_has_feature(hostp->features,
1345 fe_req_options_features);
1346 int has_hostname = am_has_feature(hostp->features,
1347 fe_req_options_hostname);
1348 int has_maxdumps = am_has_feature(hostp->features,
1349 fe_req_options_maxdumps);
1350 int has_config = am_has_feature(hostp->features,
1351 fe_req_options_config);
1353 snprintf(number, SIZEOF(number), "%d", hostp->maxdumps);
1354 req = vstralloc("SERVICE ", "sendsize", "\n",
1356 has_features ? "features=" : "",
1357 has_features ? our_feature_string : "",
1358 has_features ? ";" : "",
1359 has_maxdumps ? "maxdumps=" : "",
1360 has_maxdumps ? number : "",
1361 has_maxdumps ? ";" : "",
1362 has_hostname ? "hostname=" : "",
1363 has_hostname ? hostp->hostname : "",
1364 has_hostname ? ";" : "",
1365 has_config ? "config=" : "",
1366 has_config ? config_name : "",
1367 has_config ? ";" : "",
1370 req_len = strlen(req);
1371 req_len += 128; /* room for SECURITY ... */
1373 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1377 if(dp->todo == 0) continue;
1379 if(est(dp)->state != DISK_READY) continue;
1381 est(dp)->got_estimate = 0;
1382 if(est(dp)->level[0] == -1) {
1383 est(dp)->state = DISK_DONE;
1387 qname = quote_string(dp->name);
1388 qdevice = quote_string(dp->device);
1389 if(dp->estimate == ES_CLIENT ||
1390 dp->estimate == ES_CALCSIZE) {
1393 for(i = 0; i < MAX_LEVELS; i++) {
1395 char *exclude1 = "";
1396 char *exclude2 = "";
1397 char *excludefree = NULL;
1398 char *include1 = "";
1399 char *include2 = "";
1400 char *includefree = NULL;
1401 char spindle[NUM_STR_SIZE];
1402 char level[NUM_STR_SIZE];
1403 int lev = est(dp)->level[i];
1405 if(lev == -1) break;
1407 snprintf(level, SIZEOF(level), "%d", lev);
1408 snprintf(spindle, SIZEOF(spindle), "%d", dp->spindle);
1409 if(am_has_feature(hostp->features,fe_sendsize_req_options)){
1410 exclude1 = " OPTIONS |";
1411 exclude2 = optionstr(dp, hostp->features, NULL);
1412 if ( exclude2 == NULL ) {
1413 error("problem with option string, check the dumptype definition.\n");
1415 excludefree = exclude2;
1419 if(dp->exclude_file &&
1420 dp->exclude_file->nb_element == 1) {
1421 exclude1 = " exclude-file=";
1423 quote_string(dp->exclude_file->first->name);
1424 excludefree = exclude2;
1426 else if(dp->exclude_list &&
1427 dp->exclude_list->nb_element == 1) {
1428 exclude1 = " exclude-list=";
1430 quote_string(dp->exclude_list->first->name);
1431 excludefree = exclude2;
1433 if(dp->include_file &&
1434 dp->include_file->nb_element == 1) {
1435 include1 = " include-file=";
1437 quote_string(dp->include_file->first->name);
1438 includefree = include2;
1440 else if(dp->include_list &&
1441 dp->include_list->nb_element == 1) {
1442 include1 = " include-list=";
1444 quote_string(dp->include_list->first->name);
1445 includefree = include2;
1449 if(dp->estimate == ES_CALCSIZE &&
1450 !am_has_feature(hostp->features, fe_calcsize_estimate)) {
1451 log_add(L_WARNING,"%s:%s does not support CALCSIZE for estimate, using CLIENT.\n",
1452 hostp->hostname, qname);
1453 dp->estimate = ES_CLIENT;
1455 if(dp->estimate == ES_CLIENT)
1458 calcsize = "CALCSIZE ";
1460 if(strcmp(dp->program,"DUMP") == 0 ||
1461 strcmp(dp->program,"GNUTAR") == 0) {
1464 backup_api = "BACKUP ";
1466 l = vstralloc(calcsize,
1470 " ", dp->device ? qdevice : "",
1472 " ", est(dp)->dumpdate[i],
1474 " ", exclude1, exclude2,
1475 ((includefree != NULL) ? " " : ""),
1482 amfree(includefree);
1483 amfree(excludefree);
1491 est(dp)->state = DISK_ACTIVE;
1492 remove_disk(&startq, dp);
1494 else if (dp->estimate == ES_SERVER) {
1497 get_info(dp->host->hostname, dp->name, &info);
1498 for(i = 0; i < MAX_LEVELS; i++) {
1500 int lev = est(dp)->level[i];
1502 if(lev == -1) break;
1503 if(lev == 0) { /* use latest level 0, should do extrapolation */
1504 off_t est_size = (off_t)0;
1507 for(j=NB_HISTORY-2;j>=0;j--) {
1508 if(info.history[j].level == 0) {
1509 if(info.history[j].size < (off_t)0) continue;
1510 est_size = info.history[j].size;
1515 est(dp)->est_size[i] = est_size;
1517 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1518 est(dp)->est_size[i] = info.inf[lev].size;
1521 est(dp)->est_size[i] = (off_t)1000000;
1524 else if(lev == est(dp)->last_level) {
1525 /* means of all X day at the same level */
1528 off_t est_size_day[NB_DAY];
1529 int nb_est_day[NB_DAY];
1530 for(j=0;j<NB_DAY;j++) {
1531 est_size_day[j]=(off_t)0;
1535 for(j=NB_HISTORY-2;j>=0;j--) {
1536 if(info.history[j].level <= 0) continue;
1537 if(info.history[j].size < (off_t)0) continue;
1538 if(info.history[j].level==info.history[j+1].level) {
1539 if(nb_day <NB_DAY-1) nb_day++;
1540 est_size_day[nb_day] += info.history[j].size;
1541 nb_est_day[nb_day]++;
1547 nb_day = info.consecutive_runs + 1;
1548 if(nb_day > NB_DAY-1) nb_day = NB_DAY-1;
1550 while(nb_day > 0 && nb_est_day[nb_day] == 0) nb_day--;
1552 if(nb_est_day[nb_day] > 0) {
1553 est(dp)->est_size[i] = est_size_day[nb_day] /
1554 (off_t)nb_est_day[nb_day];
1556 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1557 est(dp)->est_size[i] = info.inf[lev].size;
1560 est(dp)->est_size[i] = (off_t)10000;
1563 else if(lev == est(dp)->last_level + 1) {
1564 /* means of all first day at a new level */
1565 off_t est_size = (off_t)0;
1568 for(j=NB_HISTORY-2;j>=0;j--) {
1569 if(info.history[j].level <= 0) continue;
1570 if(info.history[j].size < (off_t)0) continue;
1571 if(info.history[j].level == info.history[j+1].level + 1 ) {
1572 est_size += info.history[j].size;
1577 est(dp)->est_size[i] = est_size / (off_t)nb_est;
1579 else if(info.inf[lev].size > (off_t)1000) { /* stats */
1580 est(dp)->est_size[i] = info.inf[lev].size;
1583 est(dp)->est_size[i] = (off_t)100000;
1587 fprintf(stderr,"%s time %s: got result for host %s disk %s:",
1588 get_pname(), walltime_str(curclock()),
1589 dp->host->hostname, qname);
1590 fprintf(stderr," %d -> " OFF_T_FMT "K, %d -> " OFF_T_FMT "K, %d -> " OFF_T_FMT "K\n",
1591 est(dp)->level[0], (OFF_T_FMT_TYPE)est(dp)->est_size[0],
1592 est(dp)->level[1], (OFF_T_FMT_TYPE)est(dp)->est_size[1],
1593 est(dp)->level[2], (OFF_T_FMT_TYPE)est(dp)->est_size[2]);
1594 est(dp)->state = DISK_DONE;
1595 remove_disk(&startq, dp);
1596 enqueue_disk(&estq, dp);
1602 if(estimates == 0) {
1604 hostp->up = HOST_DONE;
1608 if (conf_etimeout < 0) {
1609 timeout = - conf_etimeout;
1611 timeout = estimates * conf_etimeout;
1613 } else { /* noop service */
1614 req = vstralloc("SERVICE ", "noop", "\n",
1616 "features=", our_feature_string, ";",
1620 * We use ctimeout for the "noop" request because it should be
1621 * very fast and etimeout has other side effects.
1623 timeout = (time_t)getconf_int(CNF_CTIMEOUT);
1626 secdrv = security_getdriver(hostp->disks->security_driver);
1627 if (secdrv == NULL) {
1628 error("could not find security driver '%s' for host '%s'",
1629 hostp->disks->security_driver, hostp->hostname);
1632 hostp->up = HOST_ACTIVE;
1634 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1638 if(est(dp)->state == DISK_ACTIVE) {
1639 est(dp)->errstr = NULL;
1640 enqueue_disk(&waitq, dp);
1644 protocol_sendreq(hostp->hostname, secdrv, amhost_get_security_conf,
1645 req, timeout, handle_result, hostp);
1649 static disk_t *lookup_hostdisk(
1650 /*@keep@*/ am_host_t *hp,
1655 for(dp = hp->disks; dp != NULL; dp = dp->hostnext)
1656 if(strcmp(str, dp->name) == 0) return dp;
1662 static void handle_result(
1665 security_handle_t *sech)
1671 char *msg, msg_undo;
1672 char *remoterr, *errbuf = NULL;
1681 OFF_T_FMT_TYPE size_;
1683 hostp = (am_host_t *)datap;
1684 hostp->up = HOST_READY;
1687 errbuf = vstralloc("Request to ", hostp->hostname, " failed: ",
1688 security_geterror(sech), NULL);
1691 if (pkt->type == P_NAK) {
1693 if(strncmp_const_skip(s, "ERROR ", s, ch) == 0) {
1696 goto NAK_parse_failed;
1698 skip_whitespace(s, ch);
1699 if(ch == '\0') goto NAK_parse_failed;
1701 if((s = strchr(remoterr, '\n')) != NULL) {
1702 if(s == remoterr) goto NAK_parse_failed;
1705 if (strcmp(remoterr, "unknown service: noop") != 0
1706 && strcmp(remoterr, "noop: invalid service") != 0) {
1707 errbuf = vstralloc(hostp->hostname, " NAK: ", remoterr, NULL);
1718 if(strncmp_const(line, "OPTIONS ") == 0) {
1719 t = strstr(line, "features=");
1720 if(t != NULL && (isspace((int)t[-1]) || t[-1] == ';')) {
1721 t += SIZEOF("features=")-1;
1722 am_release_feature_set(hostp->features);
1723 if((hostp->features = am_string_to_feature(t)) == NULL) {
1724 errbuf = vstralloc(hostp->hostname,
1725 ": bad features value: ",
1732 skip_quoted_line(s, ch);
1737 if(strncmp_const_skip(t, "ERROR ", t, tch) == 0) {
1739 skip_whitespace(t, tch);
1745 * If the "error" is that the "noop" service is unknown, it
1746 * just means the client is "old" (does not support the servie).
1747 * We can ignore this.
1749 if(hostp->features == NULL
1750 && pkt->type == P_NAK
1751 && (strcmp(t - 1, "unknown service: noop") == 0
1752 || strcmp(t - 1, "noop: invalid service") == 0)) {
1753 skip_quoted_line(s, ch);
1756 errbuf = vstralloc(hostp->hostname,
1757 (pkt->type == P_NAK) ? "NAK " : "",
1766 skip_quoted_string(t, tch);
1768 disk = unquote_string(msg);
1770 skip_whitespace(t, tch);
1772 if (sscanf(t - 1, "%d", &level) != 1) {
1776 skip_integer(t, tch);
1777 skip_whitespace(t, tch);
1779 dp = lookup_hostdisk(hostp, disk);
1780 dp = lookup_hostdisk(hostp, disk);
1782 log_add(L_ERROR, "%s: invalid reply from sendsize: `%s'\n",
1783 hostp->hostname, line);
1788 if (strncmp_const(t-1,"SIZE ") == 0) {
1789 if (sscanf(t - 1, "SIZE " OFF_T_FMT ,
1790 (OFF_T_FMT_TYPE *)&size_) != 1) {
1794 } else if (strncmp_const(t-1,"ERROR ") == 0) {
1795 skip_non_whitespace(t, tch);
1796 skip_whitespace(t, tch);
1798 skip_quoted_string(t,tch);
1801 if (pkt->type == P_REP) {
1802 est(dp)->errstr = unquote_string(msg);
1811 if (size > (off_t)-1) {
1812 for(i = 0; i < MAX_LEVELS; i++) {
1813 if(est(dp)->level[i] == level) {
1814 est(dp)->est_size[i] = size;
1818 if(i == MAX_LEVELS) {
1819 goto bad_msg; /* this est wasn't requested */
1821 est(dp)->got_estimate++;
1825 skip_quoted_line(s, ch);
1828 if(hostp->up == HOST_READY && hostp->features == NULL) {
1830 * The client does not support the features list, so give it an
1833 dbprintf(("%s: no feature set from host %s\n",
1834 debug_prefix_time(NULL), hostp->hostname));
1835 hostp->features = am_set_default_feature_set();
1838 security_close_connection(sech, hostp->hostname);
1840 /* XXX what about disks that only got some estimates... do we care? */
1841 /* XXX amanda 2.1 treated that case as a bad msg */
1843 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1844 if(dp->todo == 0) continue;
1845 if(est(dp)->state != DISK_ACTIVE &&
1846 est(dp)->state != DISK_PARTIALY_DONE) continue;
1848 if(est(dp)->state == DISK_ACTIVE) {
1849 remove_disk(&waitq, dp);
1851 else if(est(dp)->state == DISK_PARTIALY_DONE) {
1852 remove_disk(&pestq, dp);
1855 if(pkt->type == P_REP) {
1856 est(dp)->state = DISK_DONE;
1858 else if(pkt->type == P_PREP) {
1859 est(dp)->state = DISK_PARTIALY_DONE;
1862 if(est(dp)->level[0] == -1) continue; /* ignore this disk */
1865 qname = quote_string(dp->name);
1866 if(pkt->type == P_PREP) {
1867 fprintf(stderr,"%s: time %s: got partial result for host %s disk %s:",
1868 get_pname(), walltime_str(curclock()),
1869 dp->host->hostname, qname);
1870 fprintf(stderr," %d -> " OFF_T_FMT "K, %d -> " OFF_T_FMT "K, %d -> " OFF_T_FMT "K\n",
1871 est(dp)->level[0], (OFF_T_FMT_TYPE)est(dp)->est_size[0],
1872 est(dp)->level[1], (OFF_T_FMT_TYPE)est(dp)->est_size[1],
1873 est(dp)->level[2], (OFF_T_FMT_TYPE)est(dp)->est_size[2]);
1874 enqueue_disk(&pestq, dp);
1876 else if(pkt->type == P_REP) {
1877 fprintf(stderr,"%s: time %s: got result for host %s disk %s:",
1878 get_pname(), walltime_str(curclock()),
1879 dp->host->hostname, qname);
1880 fprintf(stderr," %d -> " OFF_T_FMT "K, %d -> " OFF_T_FMT "K, %d -> " OFF_T_FMT "K\n",
1881 est(dp)->level[0], (OFF_T_FMT_TYPE)est(dp)->est_size[0],
1882 est(dp)->level[1], (OFF_T_FMT_TYPE)est(dp)->est_size[1],
1883 est(dp)->level[2], (OFF_T_FMT_TYPE)est(dp)->est_size[2]);
1884 if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > (off_t)0) ||
1885 (est(dp)->level[1] != -1 && est(dp)->est_size[1] > (off_t)0) ||
1886 (est(dp)->level[2] != -1 && est(dp)->est_size[2] > (off_t)0)) {
1888 if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < (off_t)0) {
1890 "disk %s:%s, estimate of level %d failed.",
1891 dp->host->hostname, qname, est(dp)->level[2]);
1892 est(dp)->level[2] = -1;
1894 if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < (off_t)0) {
1896 "disk %s:%s, estimate of level %d failed.",
1897 dp->host->hostname, qname,
1899 est(dp)->level[1] = -1;
1901 if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < (off_t)0) {
1903 "disk %s:%s, estimate of level %d failed.",
1904 dp->host->hostname, qname, est(dp)->level[0]);
1905 est(dp)->level[0] = -1;
1907 enqueue_disk(&estq, dp);
1910 enqueue_disk(&failq, dp);
1911 if(est(dp)->got_estimate) {
1912 est(dp)->errstr = vstralloc("disk ", qname,
1913 ", all estimate failed", NULL);
1917 "error result for host %s disk %s: missing estimate\n",
1918 dp->host->hostname, qname);
1919 if (est(dp)->errstr == NULL) {
1920 est(dp)->errstr = vstralloc("missing result for ",
1935 errbuf = stralloc2(hostp->hostname, " NAK: [NAK parse failed]");
1936 fprintf(stderr, "got strange nak from %s:\n----\n%s----\n\n",
1937 hostp->hostname, pkt->body);
1941 fprintf(stderr,"got a bad message, stopped at:\n");
1943 fprintf(stderr,"----\n%s----\n\n", line);
1944 errbuf = stralloc2("badly formatted response from ", hostp->hostname);
1949 for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) {
1950 if(est(dp)->state != DISK_ACTIVE) continue;
1951 qname = quote_string(dp->name);
1952 est(dp)->state = DISK_DONE;
1953 if(est(dp)->state == DISK_ACTIVE) {
1954 est(dp)->state = DISK_DONE;
1955 remove_disk(&waitq, dp);
1956 enqueue_disk(&failq, dp);
1959 est(dp)->errstr = stralloc(errbuf);
1960 fprintf(stderr, "error result for host %s disk %s: %s\n",
1961 dp->host->hostname, qname, errbuf);
1967 * If there were no disks involved, make sure the error gets
1970 log_add(L_ERROR, "%s", errbuf);
1972 hostp->up = HOST_DONE;
1980 * ========================================================================
1985 static int schedule_order(disk_t *a, disk_t *b); /* subroutines */
1986 static int pick_inclevel(disk_t *dp);
1988 static void analyze_estimate(
1994 char *qname = quote_string(dp->name);
1998 fprintf(stderr, "pondering %s:%s... ",
1999 dp->host->hostname, qname);
2000 fprintf(stderr, "next_level0 %d last_level %d ",
2001 ep->next_level0, ep->last_level);
2003 if(get_info(dp->host->hostname, dp->name, &info) == 0) {
2007 ep->degr_level = -1;
2008 ep->degr_nsize = (off_t)-1;
2009 ep->degr_csize = (off_t)-1;
2011 if(ep->next_level0 <= 0 || (have_info && ep->last_level == 0
2012 && (info.command & FORCE_NO_BUMP))) {
2013 if(ep->next_level0 <= 0) {
2014 fprintf(stderr,"(due for level 0) ");
2017 ep->dump_nsize = est_size(dp, 0);
2018 ep->dump_csize = est_tape_size(dp, 0);
2019 if(ep->dump_csize <= (off_t)0) {
2021 "(no estimate for level 0, picking an incr level)\n");
2022 ep->dump_level = pick_inclevel(dp);
2023 ep->dump_nsize = est_size(dp, ep->dump_level);
2024 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2026 if(ep->dump_nsize == (off_t)-1) {
2027 ep->dump_level = ep->dump_level + 1;
2028 ep->dump_nsize = est_size(dp, ep->dump_level);
2029 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2033 total_lev0 += (double) ep->dump_csize;
2034 if(ep->last_level == -1 || dp->skip_incr) {
2035 fprintf(stderr,"(%s disk, can't switch to degraded mode)\n",
2036 dp->skip_incr? "skip-incr":"new");
2037 ep->degr_level = -1;
2038 ep->degr_nsize = (off_t)-1;
2039 ep->degr_csize = (off_t)-1;
2042 /* fill in degraded mode info */
2043 fprintf(stderr,"(picking inclevel for degraded mode)");
2044 ep->degr_level = pick_inclevel(dp);
2045 ep->degr_nsize = est_size(dp, ep->degr_level);
2046 ep->degr_csize = est_tape_size(dp, ep->degr_level);
2047 if(ep->degr_csize == (off_t)-1) {
2048 ep->degr_level = ep->degr_level + 1;
2049 ep->degr_nsize = est_size(dp, ep->degr_level);
2050 ep->degr_csize = est_tape_size(dp, ep->degr_level);
2052 if(ep->degr_csize == (off_t)-1) {
2053 fprintf(stderr,"(no inc estimate)");
2054 ep->degr_level = -1;
2056 fprintf(stderr,"\n");
2061 fprintf(stderr,"(not due for a full dump, picking an incr level)\n");
2062 /* XXX - if this returns -1 may be we should force a total? */
2063 ep->dump_level = pick_inclevel(dp);
2064 ep->dump_nsize = est_size(dp, ep->dump_level);
2065 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2067 if(ep->dump_csize == (off_t)-1) {
2068 ep->dump_level = ep->last_level;
2069 ep->dump_nsize = est_size(dp, ep->dump_level);
2070 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2072 if(ep->dump_csize == (off_t)-1) {
2073 ep->dump_level = ep->last_level + 1;
2074 ep->dump_nsize = est_size(dp, ep->dump_level);
2075 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2077 if(ep->dump_csize == (off_t)-1) {
2079 ep->dump_nsize = est_size(dp, ep->dump_level);
2080 ep->dump_csize = est_tape_size(dp, ep->dump_level);
2084 fprintf(stderr," curr level %d nsize " OFF_T_FMT " csize " OFF_T_FMT " ",
2085 ep->dump_level, (OFF_T_FMT_TYPE)ep->dump_nsize,
2086 (OFF_T_FMT_TYPE)ep->dump_csize);
2088 insert_disk(&schedq, dp, schedule_order);
2090 total_size += (off_t)tt_blocksize_kb + ep->dump_csize + tape_mark;
2092 /* update the balanced size */
2093 if(!(dp->skip_full || dp->strategy == DS_NOFULL ||
2094 dp->strategy == DS_INCRONLY)) {
2097 lev0size = est_tape_size(dp, 0);
2098 if(lev0size == (off_t)-1) lev0size = ep->last_lev0size;
2100 balanced_size += (double)(lev0size / (off_t)runs_per_cycle);
2103 fprintf(stderr,"total size " OFF_T_FMT " total_lev0 %1.0lf balanced-lev0size %1.0lf\n",
2104 (OFF_T_FMT_TYPE)total_size, total_lev0, balanced_size);
2108 static void handle_failed(
2112 char *qname = quote_string(dp->name);
2115 * From George Scott <George.Scott@cc.monash.edu.au>:
2117 * If a machine is down when the planner is run it guesses from historical
2118 * data what the size of tonights dump is likely to be and schedules a
2119 * dump anyway. The dumper then usually discovers that that machine is
2120 * still down and ends up with a half full tape. Unfortunately the
2121 * planner had to delay another dump because it thought that the tape was
2122 * full. The fix here is for the planner to ignore unavailable machines
2123 * rather than ignore the fact that they are unavailable.
2128 if(est(dp)->last_level != -1) {
2130 "Could not get estimate for %s:%s, using historical data.",
2131 dp->host->hostname, qname);
2132 analyze_estimate(dp);
2138 errstr = est(dp)->errstr? est(dp)->errstr : "hmm, no error indicator!";
2140 fprintf(stderr,"errstr:%s:\n", errstr);
2141 fprintf(stderr, "%s: FAILED %s %s %s 0 [%s]\n",
2142 get_pname(), dp->host->hostname, qname, planner_timestamp, errstr);
2144 log_add(L_FAIL, "%s %s %s 0 [%s]", dp->host->hostname, qname,
2145 planner_timestamp, errstr);
2148 /* XXX - memory leak with *dp */
2153 * insert-sort by decreasing priority, then
2154 * by decreasing size within priority levels.
2157 static int schedule_order(
2164 diff = est(b)->dump_priority - est(a)->dump_priority;
2165 if(diff != 0) return diff;
2167 ldiff = est(b)->dump_csize - est(a)->dump_csize;
2168 if(ldiff < (off_t)0) return -1; /* XXX - there has to be a better way to dothis */
2169 if(ldiff > (off_t)0) return 1;
2174 static int pick_inclevel(
2177 int base_level, bump_level;
2178 off_t base_size, bump_size;
2182 base_level = est(dp)->last_level;
2184 /* if last night was level 0, do level 1 tonight, no ifs or buts */
2185 if(base_level == 0) {
2186 fprintf(stderr," picklev: last night 0, so tonight level 1\n");
2190 /* if no-full option set, always do level 1 */
2191 if(dp->strategy == DS_NOFULL) {
2192 fprintf(stderr," picklev: no-full set, so always level 1\n");
2196 base_size = est_size(dp, base_level);
2198 /* if we didn't get an estimate, we can't do an inc */
2199 if(base_size == (off_t)-1) {
2200 base_size = est_size(dp, base_level+1);
2201 if(base_size > (off_t)0) /* FORCE_BUMP */
2202 return base_level+1;
2203 fprintf(stderr," picklev: no estimate for level %d, so no incs\n", base_level);
2207 thresh = bump_thresh(base_level, est_size(dp, 0), dp->bumppercent, dp->bumpsize, dp->bumpmult);
2210 " pick: size " OFF_T_FMT " level %d days %d (thresh " OFF_T_FMT "K, %d days)\n",
2211 (OFF_T_FMT_TYPE)base_size, base_level, est(dp)->level_days,
2212 (OFF_T_FMT_TYPE)thresh, dp->bumpdays);
2215 || est(dp)->level_days < dp->bumpdays
2216 || base_size <= thresh)
2219 bump_level = base_level + 1;
2220 bump_size = est_size(dp, bump_level);
2222 if(bump_size == (off_t)-1) return base_level;
2224 fprintf(stderr, " pick: next size " OFF_T_FMT "... ",
2225 (OFF_T_FMT_TYPE)bump_size);
2227 if(base_size - bump_size < thresh) {
2228 fprintf(stderr, "not bumped\n");
2232 qname = quote_string(dp->name);
2233 fprintf(stderr, "BUMPED\n");
2234 log_add(L_INFO, "Incremental of %s:%s bumped to level %d.",
2235 dp->host->hostname, qname, bump_level);
2245 ** ========================================================================
2248 ** We have two strategies here:
2252 ** If we are trying to fit too much on the tape something has to go. We
2253 ** try to delay totals until tomorrow by converting them into incrementals
2254 ** and, if that is not effective enough, dropping incrementals altogether.
2255 ** While we are searching for the guilty dump (the one that is really
2256 ** causing the schedule to be oversize) we have probably trampled on a lot of
2257 ** innocent dumps, so we maintain a "before image" list and use this to
2258 ** put back what we can.
2260 ** 2. Promote dumps.
2262 ** We try to keep the amount of tape used by total dumps the same each night.
2263 ** If there is some spare tape in this run we have a look to see if any of
2264 ** tonights incrementals could be promoted to totals and leave us with a
2265 ** more balanced cycle.
2268 static void delay_one_dump(disk_t *dp, int delete, ...);
2269 static int promote_highest_priority_incremental(void);
2270 static int promote_hills(void);
2272 /* delay any dumps that will not fit */
2273 static void delay_dumps(void)
2280 off_t new_total; /* New total_size */
2281 char est_kb[20]; /* Text formatted dump size */
2282 int nb_forced_level_0;
2288 biq.head = biq.tail = NULL;
2291 ** 1. Delay dumps that are way oversize.
2293 ** Dumps larger that the size of the tapes we are using are just plain
2294 ** not going to fit no matter how many other dumps we drop. Delay
2295 ** oversize totals until tomorrow (by which time my owner will have
2296 ** resolved the problem!) and drop incrementals altogether. Naturally
2297 ** a large total might be delayed into a large incremental so these
2298 ** need to be checked for separately.
2301 for(dp = schedq.head; dp != NULL; dp = ndp) {
2302 int avail_tapes = 1;
2303 if (dp->tape_splitsize > (off_t)0)
2304 avail_tapes = conf_runtapes;
2306 ndp = dp->next; /* remove_disk zaps this */
2308 full_size = est_tape_size(dp, 0);
2309 if (full_size > tapetype_get_length(tape) * (off_t)avail_tapes) {
2310 char *qname = quote_string(dp->name);
2311 if (conf_runtapes > 1 && dp->tape_splitsize == (off_t)0) {
2312 log_add(L_WARNING, "disk %s:%s, full dump (" OFF_T_FMT
2313 "KB) will be larger than available tape space"
2314 ", you could define a splitsize",
2315 dp->host->hostname, qname,
2316 (OFF_T_FMT_TYPE)full_size);
2318 log_add(L_WARNING, "disk %s:%s, full dump (" OFF_T_FMT
2319 "KB) will be larger than available tape space",
2320 dp->host->hostname, qname,
2321 (OFF_T_FMT_TYPE)full_size);
2326 if (est(dp)->dump_csize == (off_t)-1 ||
2327 est(dp)->dump_csize <= tapetype_get_length(tape) * (off_t)avail_tapes) {
2331 /* Format dumpsize for messages */
2332 snprintf(est_kb, 20, OFF_T_FMT " KB,",
2333 (OFF_T_FMT_TYPE)est(dp)->dump_csize);
2335 if(est(dp)->dump_level == 0) {
2338 message = "but cannot incremental dump skip-incr disk";
2340 else if(est(dp)->last_level < 0) {
2342 message = "but cannot incremental dump new disk";
2344 else if(est(dp)->degr_level < 0) {
2346 message = "but no incremental estimate";
2348 else if (est(dp)->degr_csize > tapetype_get_length(tape)) {
2350 message = "incremental dump also larger than tape";
2354 message = "full dump delayed";
2359 message = "skipping incremental";
2361 delay_one_dump(dp, delete, "dump larger than available tape space,",
2362 est_kb, message, NULL);
2366 ** 2. Delay total dumps.
2368 ** Delay total dumps until tomorrow (or the day after!). We start with
2369 ** the lowest priority (most dispensable) and work forwards. We take
2370 ** care not to delay *all* the dumps since this could lead to a stale
2371 ** mate [for any one disk there are only three ways tomorrows dump will
2372 ** be smaller than todays: 1. we do a level 0 today so tomorows dump
2373 ** will be a level 1; 2. the disk gets more data so that it is bumped
2374 ** tomorrow (this can be a slow process); and, 3. the disk looses some
2375 ** data (when does that ever happen?)].
2378 nb_forced_level_0 = 0;
2380 for(dp = schedq.head; dp != NULL && preserve == NULL; dp = dp->next)
2381 if(est(dp)->dump_level == 0)
2384 /* 2.a. Do not delay forced full */
2385 for(dp = schedq.tail;
2386 dp != NULL && total_size > tape_length;
2390 if(est(dp)->dump_level != 0) continue;
2392 get_info(dp->host->hostname, dp->name, &info);
2393 if(info.command & FORCE_FULL) {
2394 nb_forced_level_0 += 1;
2399 if(dp != preserve) {
2401 /* Format dumpsize for messages */
2402 snprintf(est_kb, 20, OFF_T_FMT " KB,",
2403 (OFF_T_FMT_TYPE)est(dp)->dump_csize);
2407 message = "but cannot incremental dump skip-incr disk";
2409 else if(est(dp)->last_level < 0) {
2411 message = "but cannot incremental dump new disk";
2413 else if(est(dp)->degr_level < 0) {
2415 message = "but no incremental estimate";
2419 message = "full dump delayed";
2421 delay_one_dump(dp, delete, "dumps too big,", est_kb,
2426 /* 2.b. Delay forced full if needed */
2427 if(nb_forced_level_0 > 0 && total_size > tape_length) {
2428 for(dp = schedq.tail;
2429 dp != NULL && total_size > tape_length;
2433 if(est(dp)->dump_level == 0 && dp != preserve) {
2435 /* Format dumpsize for messages */
2436 snprintf(est_kb, 20, OFF_T_FMT " KB,",
2437 (OFF_T_FMT_TYPE)est(dp)->dump_csize);
2441 message = "but cannot incremental dump skip-incr disk";
2443 else if(est(dp)->last_level < 0) {
2445 message = "but cannot incremental dump new disk";
2447 else if(est(dp)->degr_level < 0) {
2449 message = "but no incremental estimate";
2453 message = "full dump delayed";
2455 delay_one_dump(dp, delete, "dumps too big,", est_kb,
2462 ** 3. Delay incremental dumps.
2464 ** Delay incremental dumps until tomorrow. This is a last ditch attempt
2465 ** at making things fit. Again, we start with the lowest priority (most
2466 ** dispensable) and work forwards.
2469 for(dp = schedq.tail;
2470 dp != NULL && total_size > tape_length;
2474 if(est(dp)->dump_level != 0) {
2476 /* Format dumpsize for messages */
2477 snprintf(est_kb, 20, OFF_T_FMT " KB,",
2478 (OFF_T_FMT_TYPE)est(dp)->dump_csize);
2480 delay_one_dump(dp, 1,
2481 "dumps way too big,",
2483 "must skip incremental dumps",
2489 ** 4. Reinstate delayed dumps.
2491 ** We might not have needed to stomp on all of the dumps we have just
2492 ** delayed above. Try to reinstate them all starting with the last one
2493 ** and working forwards. It is unlikely that the last one will fit back
2494 ** in but why complicate the code?
2497 /*@i@*/ for(bi = biq.tail; bi != NULL; bi = nbi) {
2498 int avail_tapes = 1;
2501 if(dp->tape_splitsize > (off_t)0)
2502 avail_tapes = conf_runtapes;
2505 new_total = total_size + (off_t)tt_blocksize_kb +
2506 bi->csize + (off_t)tape_mark;
2508 new_total = total_size - est(dp)->dump_csize + bi->csize;
2510 if((new_total <= tape_length) &&
2511 (bi->csize < (tapetype_get_length(tape) * (off_t)avail_tapes))) {
2513 total_size = new_total;
2515 if(bi->level == 0) {
2516 total_lev0 += (double) bi->csize;
2518 insert_disk(&schedq, dp, schedule_order);
2521 est(dp)->dump_level = bi->level;
2522 est(dp)->dump_nsize = bi->nsize;
2523 est(dp)->dump_csize = bi->csize;
2527 if(bi->next == NULL)
2528 biq.tail = bi->prev;
2530 (bi->next)->prev = bi->prev;
2531 if(bi->prev == NULL)
2532 biq.head = bi->next;
2534 (bi->prev)->next = bi->next;
2542 ** 5. Output messages about what we have done.
2544 ** We can't output messages while we are delaying dumps because we might
2545 ** reinstate them later. We remember all the messages and output them
2549 /*@i@*/ for(bi = biq.head; bi != NULL; bi = nbi) {
2552 fprintf(stderr, "%s: FAILED %s\n", get_pname(), bi->errstr);
2553 log_add(L_FAIL, "%s", bi->errstr);
2557 fprintf(stderr, " delay: %s now at level %d\n",
2558 bi->errstr, est(dp)->dump_level);
2559 log_add(L_INFO, "%s", bi->errstr);
2567 fprintf(stderr, " delay: Total size now " OFF_T_FMT ".\n",
2568 (OFF_T_FMT_TYPE)total_size);
2575 * Remove a dump or modify it from full to incremental.
2576 * Keep track of it on the bi q in case we can add it back later.
2579 static void delay_one_dump,
2585 char level_str[NUM_STR_SIZE];
2588 char *qname = quote_string(dp->name);
2590 arglist_start(argp, delete);
2592 total_size -= (off_t)tt_blocksize_kb + est(dp)->dump_csize + (off_t)tape_mark;
2593 if(est(dp)->dump_level == 0) {
2594 total_lev0 -= (double) est(dp)->dump_csize;
2597 bi = alloc(SIZEOF(bi_t));
2599 bi->prev = biq.tail;
2600 if(biq.tail == NULL)
2603 biq.tail->next = bi;
2606 bi->deleted = delete;
2608 bi->level = est(dp)->dump_level;
2609 bi->nsize = est(dp)->dump_nsize;
2610 bi->csize = est(dp)->dump_csize;
2612 snprintf(level_str, SIZEOF(level_str), "%d", est(dp)->dump_level);
2613 bi->errstr = vstralloc(dp->host->hostname,
2615 " ", planner_timestamp ? planner_timestamp : "?",
2619 while ((next = arglist_val(argp, char *)) != NULL) {
2620 bi->errstr = newvstralloc(bi->errstr, bi->errstr, sep, next, NULL);
2623 strappend(bi->errstr, "]");
2627 remove_disk(&schedq, dp);
2629 est(dp)->dump_level = est(dp)->degr_level;
2630 est(dp)->dump_nsize = est(dp)->degr_nsize;
2631 est(dp)->dump_csize = est(dp)->degr_csize;
2632 total_size += (off_t)tt_blocksize_kb + est(dp)->dump_csize + (off_t)tape_mark;
2639 static int promote_highest_priority_incremental(void)
2641 disk_t *dp, *dp1, *dp_promote;
2642 off_t new_size, new_total, new_lev0;
2644 int nb_today, nb_same_day, nb_today2;
2645 int nb_disk_today, nb_disk_same_day;
2649 * return 1 if did so; must update total_size correctly; must not
2650 * cause total_size to exceed tape_length
2654 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2656 est(dp)->promote = -1000;
2658 if(est_size(dp,0) <= (off_t)0)
2661 if(est(dp)->next_level0 <= 0)
2664 if(est(dp)->next_level0 > dp->maxpromoteday)
2667 new_size = est_tape_size(dp, 0);
2668 new_total = total_size - est(dp)->dump_csize + new_size;
2669 new_lev0 = (off_t)total_lev0 + new_size;
2674 nb_disk_same_day = 0;
2675 for(dp1 = schedq.head; dp1 != NULL; dp1 = dp1->next) {
2676 if(est(dp1)->dump_level == 0)
2678 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2680 if(strcmp(dp->host->hostname, dp1->host->hostname) == 0) {
2681 if(est(dp1)->dump_level == 0)
2683 else if(est(dp1)->next_level0 == est(dp)->next_level0)
2688 /* do not promote if overflow tape */
2689 if(new_total > tape_length)
2692 /* do not promote if overflow balanced size and something today */
2693 /* promote if nothing today */
2694 if((new_lev0 > (off_t)(balanced_size + balance_threshold)) &&
2695 (nb_disk_today > 0))
2698 /* do not promote if only one disk due that day and nothing today */
2699 if(nb_disk_same_day == 1 && nb_disk_today == 0)
2702 nb_today2 = nb_today*nb_today;
2703 if(nb_today == 0 && nb_same_day > 1)
2706 if(nb_same_day >= nb_today2) {
2707 est(dp)->promote = ((nb_same_day - nb_today2)*(nb_same_day - nb_today2)) +
2708 conf_dumpcycle - est(dp)->next_level0;
2711 est(dp)->promote = -nb_today2 +
2712 conf_dumpcycle - est(dp)->next_level0;
2715 qname = quote_string(dp->name);
2716 if(!dp_promote || est(dp_promote)->promote < est(dp)->promote) {
2718 fprintf(stderr," try %s:%s %d %d %d = %d\n",
2719 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2722 fprintf(stderr,"no try %s:%s %d %d %d = %d\n",
2723 dp->host->hostname, qname, nb_same_day, nb_today, est(dp)->next_level0, est(dp)->promote);
2731 qname = quote_string(dp->name);
2732 new_size = est_tape_size(dp, 0);
2733 new_total = total_size - est(dp)->dump_csize + new_size;
2734 new_lev0 = (off_t)total_lev0 + new_size;
2736 total_size = new_total;
2737 total_lev0 = (double)new_lev0;
2738 check_days = est(dp)->next_level0;
2739 est(dp)->degr_level = est(dp)->dump_level;
2740 est(dp)->degr_nsize = est(dp)->dump_nsize;
2741 est(dp)->degr_csize = est(dp)->dump_csize;
2742 est(dp)->dump_level = 0;
2743 est(dp)->dump_nsize = est_size(dp, 0);
2744 est(dp)->dump_csize = new_size;
2745 est(dp)->next_level0 = 0;
2748 " promote: moving %s:%s up, total_lev0 %1.0lf, total_size " OFF_T_FMT "\n",
2749 dp->host->hostname, qname,
2750 total_lev0, (OFF_T_FMT_TYPE)total_size);
2753 "Full dump of %s:%s promoted from %d day%s ahead.",
2754 dp->host->hostname, qname,
2755 check_days, (check_days == 1) ? "" : "s");
2763 static int promote_hills(void)
2766 struct balance_stats {
2778 /* If we are already doing a level 0 don't bother */
2782 /* Do the guts of an "amadmin balance" */
2783 my_dumpcycle = conf_dumpcycle;
2784 if(my_dumpcycle > 10000) my_dumpcycle = 10000;
2786 sp = (struct balance_stats *)
2787 alloc(SIZEOF(struct balance_stats) * my_dumpcycle);
2789 for(days = 0; days < my_dumpcycle; days++) {
2791 sp[days].size = (off_t)0;
2794 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2795 days = est(dp)->next_level0; /* This is > 0 by definition */
2796 if(days<my_dumpcycle && !dp->skip_full && dp->strategy != DS_NOFULL &&
2797 dp->strategy != DS_INCRONLY) {
2799 sp[days].size += est(dp)->last_lev0size;
2803 /* Search for a suitable big hill and cut it down */
2805 /* Find the tallest hill */
2806 hill_size = (off_t)0;
2807 for(days = 0; days < my_dumpcycle; days++) {
2808 if(sp[days].disks > 1 && sp[days].size > hill_size) {
2809 hill_size = sp[days].size;
2814 if(hill_size <= (off_t)0) break; /* no suitable hills */
2816 /* Find all the dumps in that hill and try and remove one */
2817 for(dp = schedq.head; dp != NULL; dp = dp->next) {
2818 if(est(dp)->next_level0 != hill_days ||
2819 est(dp)->next_level0 > dp->maxpromoteday ||
2821 dp->strategy == DS_NOFULL ||
2822 dp->strategy == DS_INCRONLY)
2824 new_size = est_tape_size(dp, 0);
2825 new_total = total_size - est(dp)->dump_csize + new_size;
2826 if(new_total > tape_length)
2828 /* We found a disk we can promote */
2829 qname = quote_string(dp->name);
2830 total_size = new_total;
2831 total_lev0 += (double)new_size;
2832 est(dp)->degr_level = est(dp)->dump_level;
2833 est(dp)->degr_nsize = est(dp)->dump_nsize;
2834 est(dp)->degr_csize = est(dp)->dump_csize;
2835 est(dp)->dump_level = 0;
2836 est(dp)->next_level0 = 0;
2837 est(dp)->dump_nsize = est_size(dp, 0);
2838 est(dp)->dump_csize = new_size;
2841 " promote: moving %s:%s up, total_lev0 %1.0lf, total_size " OFF_T_FMT "\n",
2842 dp->host->hostname, qname,
2843 total_lev0, (OFF_T_FMT_TYPE)total_size);
2846 "Full dump of %s:%s specially promoted from %d day%s ahead.",
2847 dp->host->hostname, qname,
2848 hill_days, (hill_days == 1) ? "" : "s");
2854 /* All the disks in that hill were unsuitable. */
2855 sp[hill_days].disks = 0; /* Don't get tricked again */
2863 * ========================================================================
2866 * XXX - memory leak - we shouldn't just throw away *dp
2868 static void output_scheduleline(
2872 time_t dump_time = 0, degr_time = 0;
2873 double dump_kps = 0, degr_kps = 0;
2874 char *schedline = NULL, *degr_str = NULL;
2875 char dump_priority_str[NUM_STR_SIZE];
2876 char dump_level_str[NUM_STR_SIZE];
2877 char dump_nsize_str[NUM_STR_SIZE];
2878 char dump_csize_str[NUM_STR_SIZE];
2879 char dump_time_str[NUM_STR_SIZE];
2880 char dump_kps_str[NUM_STR_SIZE];
2881 char degr_level_str[NUM_STR_SIZE];
2882 char degr_nsize_str[NUM_STR_SIZE];
2883 char degr_csize_str[NUM_STR_SIZE];
2884 char degr_time_str[NUM_STR_SIZE];
2885 char degr_kps_str[NUM_STR_SIZE];
2886 char *dump_date, *degr_date;
2889 char *qname = quote_string(dp->name);
2893 if(ep->dump_csize == (off_t)-1) {
2894 /* no estimate, fail the disk */
2896 "%s: FAILED %s %s %s %d [no estimate]\n",
2898 dp->host->hostname, qname, planner_timestamp, ep->dump_level);
2899 log_add(L_FAIL, "%s %s %s %d [no estimate]",
2900 dp->host->hostname, qname, planner_timestamp, ep->dump_level);
2905 dump_date = degr_date = (char *)0;
2906 for(i = 0; i < MAX_LEVELS; i++) {
2907 if(ep->dump_level == ep->level[i])
2908 dump_date = ep->dumpdate[i];
2909 if(ep->degr_level == ep->level[i])
2910 degr_date = ep->dumpdate[i];
2913 #define fix_rate(rate) (rate < 1.0 ? DEFAULT_DUMPRATE : rate)
2915 if(ep->dump_level == 0) {
2916 dump_kps = fix_rate(ep->fullrate);
2917 dump_time = (time_t)((double)ep->dump_csize / dump_kps);
2919 if(ep->degr_csize != (off_t)-1) {
2920 degr_kps = fix_rate(ep->incrrate);
2921 degr_time = (time_t)((double)ep->degr_csize / degr_kps);
2925 dump_kps = fix_rate(ep->incrrate);
2926 dump_time = (time_t)((double)ep->dump_csize / dump_kps);
2929 if(ep->dump_level == 0 && ep->degr_csize != (off_t)-1) {
2930 snprintf(degr_level_str, sizeof(degr_level_str),
2931 "%d", ep->degr_level);
2932 snprintf(degr_nsize_str, sizeof(degr_nsize_str),
2933 OFF_T_FMT, (OFF_T_FMT_TYPE)ep->degr_nsize);
2934 snprintf(degr_csize_str, sizeof(degr_csize_str),
2935 OFF_T_FMT, (OFF_T_FMT_TYPE)ep->degr_csize);
2936 snprintf(degr_time_str, sizeof(degr_time_str),
2937 OFF_T_FMT, (OFF_T_FMT_TYPE)degr_time);
2938 snprintf(degr_kps_str, sizeof(degr_kps_str),
2940 degr_str = vstralloc(" ", degr_level_str,
2942 " ", degr_nsize_str,
2943 " ", degr_csize_str,
2948 snprintf(dump_priority_str, SIZEOF(dump_priority_str),
2949 "%d", ep->dump_priority);
2950 snprintf(dump_level_str, SIZEOF(dump_level_str),
2951 "%d", ep->dump_level);
2952 snprintf(dump_nsize_str, sizeof(dump_nsize_str),
2953 OFF_T_FMT, (OFF_T_FMT_TYPE)ep->dump_nsize);
2954 snprintf(dump_csize_str, sizeof(dump_csize_str),
2955 OFF_T_FMT, (OFF_T_FMT_TYPE)ep->dump_csize);
2956 snprintf(dump_time_str, sizeof(dump_time_str),
2957 OFF_T_FMT, (OFF_T_FMT_TYPE)dump_time);
2958 snprintf(dump_kps_str, sizeof(dump_kps_str),
2960 features = am_feature_to_string(dp->host->features);
2961 schedline = vstralloc("DUMP ",dp->host->hostname,
2964 " ", planner_timestamp,
2965 " ", dump_priority_str,
2966 " ", dump_level_str,
2968 " ", dump_nsize_str,
2969 " ", dump_csize_str,
2972 degr_str ? degr_str : "",
2975 fputs(schedline, stdout);
2976 fputs(schedline, stderr);