X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=server-src%2Fplanner.c;h=9aec032e7d20902f6dc2d15bcdfe3790f2612fdf;hb=c9bb64dd8260123d77b6f7be0f051f450b193e66;hp=82522b77c379cd0f3e8d7bc4ad0711bd95e130fa;hpb=9e3f8b799b5cfbbd61ce528e5a73794fc1bb8000;p=debian%2Famanda diff --git a/server-src/planner.c b/server-src/planner.c index 82522b7..9aec032 100644 --- a/server-src/planner.c +++ b/server-src/planner.c @@ -24,7 +24,7 @@ * file named AUTHORS, in the root directory of this distribution. */ /* - * $Id: planner.c,v 1.76.2.15.2.13.2.35 2004/05/10 16:43:49 martinea Exp $ + * $Id: planner.c,v 1.76.2.15.2.13.2.32.2.16 2005/03/16 18:09:50 martinea Exp $ * * backup schedule planner for the Amanda backup system. */ @@ -58,12 +58,9 @@ int conf_runtapes; int conf_dumpcycle; int conf_runspercycle; int conf_tapecycle; -int conf_bumpdays; -int conf_bumpsize; int conf_etimeout; int conf_reserve; int conf_autoflush; -double conf_bumpmult; #define HOST_READY ((void *)0) /* must be 0 */ #define HOST_ACTIVE ((void *)1) @@ -71,7 +68,8 @@ double conf_bumpmult; #define DISK_READY 0 /* must be 0 */ #define DISK_ACTIVE 1 -#define DISK_DONE 2 +#define DISK_PARTIALY_DONE 2 +#define DISK_DONE 3 typedef struct est_s { int state; @@ -96,7 +94,8 @@ typedef struct est_s { #define est(dp) ((est_t *)(dp)->up) -disklist_t startq, waitq, estq, failq, schedq; +/* pestq = partial estimate */ +disklist_t startq, waitq, pestq, estq, failq, schedq; long total_size; double total_lev0, balanced_size, balance_threshold; unsigned long tape_length, tape_mark; @@ -183,6 +182,8 @@ char **argv; close(fd); } + setvbuf(stderr, (char *)NULL, _IOLBF, 0); + if (argc > 1) { config_name = stralloc(argv[1]); config_dir = vstralloc(CONFIG_DIR, "/", config_name, "/", NULL); @@ -305,9 +306,6 @@ char **argv; conf_dumpcycle = getconf_int(CNF_DUMPCYCLE); conf_runspercycle = getconf_int(CNF_RUNSPERCYCLE); conf_tapecycle = getconf_int(CNF_TAPECYCLE); - conf_bumpdays = getconf_int(CNF_BUMPDAYS); - conf_bumpsize = getconf_int(CNF_BUMPSIZE); - conf_bumpmult = getconf_real(CNF_BUMPMULT); conf_etimeout = getconf_int(CNF_ETIMEOUT); conf_reserve = getconf_int(CNF_RESERVE); conf_autoflush = getconf_int(CNF_AUTOFLUSH); @@ -361,7 +359,47 @@ char **argv; walltime_str(timessub(curclock(), section_start))); /* - * 3. Calculate Preliminary Dump Levels + * 3. Send autoflush dumps left on the holding disks + * + * This should give us something to do while we generate the new + * dump schedule. + */ + + fprintf(stderr,"\nSENDING FLUSHES...\n"); + if(conf_autoflush) { + dumpfile_t file; + sl_t *holding_list; + sle_t *holding_file; + holding_list = get_flush(NULL, NULL, 0, 0); + for(holding_file=holding_list->first; holding_file != NULL; + holding_file = holding_file->next) { + get_dumpfile(holding_file->name, &file); + + log_add(L_DISK, "%s %s", file.name, file.disk); + fprintf(stderr, + "FLUSH %s %s %s %d %s\n", + file.name, + file.disk, + file.datestamp, + file.dumplevel, + holding_file->name); + fprintf(stdout, + "FLUSH %s %s %s %d %s\n", + file.name, + file.disk, + file.datestamp, + file.dumplevel, + holding_file->name); + } + free_sl(holding_list); + holding_list = NULL; + } + fprintf(stderr, "ENDFLUSH\n"); + fprintf(stdout, "ENDFLUSH\n"); + fflush(stdout); + + /* + * 4. Calculate Preliminary Dump Levels * * Before we can get estimates from the remote slave hosts, we make a * first attempt at guessing what dump levels we will be dumping at @@ -386,7 +424,7 @@ char **argv; /* - * 4. Get Dump Size Estimates from Remote Client Hosts + * 5. Get Dump Size Estimates from Remote Client Hosts * * Each host is queried (in parallel) for dump size information on all * of its disks, and the results gathered as they come in. @@ -398,6 +436,8 @@ char **argv; section_start = curclock(); estq.head = estq.tail = NULL; + pestq.head = pestq.tail = NULL; + waitq.head = waitq.tail = NULL; failq.head = failq.tail = NULL; get_estimates(); @@ -418,7 +458,7 @@ char **argv; /* - * 5. Analyze Dump Estimates + * 6. Analyze Dump Estimates * * Each disk's estimates are looked at to determine what level it * should dump at, and to calculate the expected size and time taking @@ -456,7 +496,7 @@ char **argv; /* - * 6. Delay Dumps if Schedule Too Big + * 7. Delay Dumps if Schedule Too Big * * If the generated schedule is too big to fit on the tape, we need to * delay some full dumps to make room. Incrementals will be done @@ -481,7 +521,7 @@ char **argv; /* - * 7. Promote Dumps if Schedule Too Small + * 8. Promote Dumps if Schedule Too Small * * Amanda attempts to balance the full dumps over the length of the * dump cycle. If this night's full dumps are too small relative to @@ -516,7 +556,7 @@ char **argv; /* - * 8. Output Schedule + * 9. Output Schedule * * The schedule goes to stdout, presumably to driver. A copy is written * on stderr for the debug file. @@ -524,43 +564,11 @@ char **argv; fprintf(stderr,"\nGENERATING SCHEDULE:\n--------\n"); - if(conf_autoflush) { - dumpfile_t file; - sl_t *holding_list; - sle_t *holding_file; - holding_list = get_flush(NULL, datestamp, 0, 0); - for(holding_file=holding_list->first; holding_file != NULL; - holding_file = holding_file->next) { - get_dumpfile(holding_file->name, &file); - - log_add(L_DISK, "%s %s", file.name, file.disk); - fprintf(stderr, - "FLUSH %s %s %s %d %s\n", - file.name, - file.disk, - file.datestamp, - file.dumplevel, - holding_file->name); - fprintf(stdout, - "FLUSH %s %s %s %d %s\n", - file.name, - file.disk, - file.datestamp, - file.dumplevel, - holding_file->name); - } - free_sl(holding_list); - holding_list = NULL; - } - fprintf(stderr, "ENDFLUSH\n"); - fprintf(stdout, "ENDFLUSH\n"); - fflush(stdout); - while(!empty(schedq)) output_scheduleline(dequeue_disk(&schedq)); fprintf(stderr, "--------\n"); close_infofile(); - log_add(L_FINISH, "date %s", datestamp); + log_add(L_FINISH, "date %s time %s", datestamp, walltime_str(curclock())); amfree(msg); amfree(datestamp); @@ -592,7 +600,7 @@ static long est_size P((disk_t *dp, int level)); static long est_tape_size P((disk_t *dp, int level)); static int next_level0 P((disk_t *dp, info_t *info)); static int runs_at P((info_t *info, int lev)); -static long bump_thresh P((int level)); +static long bump_thresh P((int level, long size_level_0, int bumppercent, int bumpsize, double bumpmult)); static int when_overwrite P((char *label)); static void askfor(ep, seq, lev, info) @@ -601,8 +609,6 @@ int seq; /* sequence number of request */ int lev; /* dump level being requested */ info_t *info; /* info block for disk */ { - stats_t *stat; - if(seq < 0 || seq >= MAX_LEVELS) { error("error [planner askfor: seq out of range 0..%d: %d]", MAX_LEVELS, seq); @@ -615,7 +621,7 @@ info_t *info; /* info block for disk */ if (lev == -1) { ep->level[seq] = -1; ep->dumpdate[seq] = (char *)0; - ep->est_size[seq] = -1; + ep->est_size[seq] = -2; return; } @@ -624,9 +630,7 @@ info_t *info; /* info block for disk */ ep->dumpdate[seq] = stralloc(get_dumpdate(info,lev)); malloc_mark(ep->dumpdate[seq]); - stat = &info->inf[lev]; - if(stat->date == EPOCH) ep->est_size[seq] = -1; - else ep->est_size[seq] = stat->size; + ep->est_size[seq] = -2; return; } @@ -891,8 +895,8 @@ setup_estimate(dp) * last night, we can't bump. */ if((info.inf[curr_level].size == 0 || /* no data, try it anyway */ - (((info.inf[curr_level].size > bump_thresh(curr_level))) - && ep->level_days >= conf_bumpdays)) + (((info.inf[curr_level].size > bump_thresh(curr_level, info.inf[0].size,dp->bumppercent, dp->bumpsize, dp->bumpmult))) + && ep->level_days >= dp->bumpdays)) && curr_level + 1 < DUMP_LEVELS) { askfor(ep, i++, curr_level+1, &info); } @@ -905,15 +909,13 @@ setup_estimate(dp) /* debug output */ - fprintf(stderr, "setup_estimate: %s:%s: command %d, options:", - dp->host->hostname, dp->name, info.command); - if(dp->strategy == DS_NOFULL) fputs(" no-full", stderr); - if(dp->strategy == DS_INCRONLY) fputs(" incr-only", stderr); - if(dp->skip_full) fputs(" skip-full", stderr); - if(dp->skip_incr) fputs(" skip-incr", stderr); - fprintf(stderr, "\n last_level %d next_level0 %d level_days %d\n", - ep->last_level, ep->next_level0, ep->level_days); - fprintf(stderr, " getting estimates %d (%ld) %d (%ld) %d (%ld)\n", + fprintf(stderr, "setup_estimate: %s:%s: command %d, options: %s last_level %d next_level0 %d level_days %d getting estimates %d (%ld) %d (%ld) %d (%ld)\n", + dp->host->hostname, dp->name, info.command, + dp->strategy == DS_NOFULL ? "no-full" : + dp->strategy == DS_INCRONLY ? "incr-only" : + dp->skip_full ? "skip-full" : + dp->skip_incr ? "skip-incr" : "none", + ep->last_level, ep->next_level0, ep->level_days, ep->level[0], ep->est_size[0], ep->level[1], ep->est_size[1], ep->level[2], ep->est_size[2]); @@ -1079,13 +1081,22 @@ int lev; } -static long bump_thresh(level) +static long bump_thresh(level, size_level_0, bumppercent, bumpsize, bumpmult) int level; +long size_level_0; +int bumppercent; +int bumpsize; +double bumpmult; { double bump; - bump = conf_bumpsize; - while(--level) bump = bump * conf_bumpmult; + if(bumppercent != 0 && size_level_0 > 1024) { + bump = (size_level_0 * bumppercent)/100.0; + } + else { + bump = bumpsize; + } + while(--level) bump = bump * bumpmult; return (long)bump; } @@ -1098,14 +1109,14 @@ int level; * */ -static void getsize P((host_t *hostp)); -static disk_t *lookup_hostdisk P((host_t *hp, char *str)); +static void getsize P((am_host_t *hostp)); +static disk_t *lookup_hostdisk P((am_host_t *hp, char *str)); static void handle_result P((proto_t *p, pkt_t *pkt)); static void get_estimates P((void)) { - host_t *hostp; + am_host_t *hostp; disk_t *dp; struct servent *amandad; int something_started; @@ -1146,16 +1157,80 @@ static void get_estimates P((void)) est(dp)->errstr = "hmm, disk was stranded on waitq"; enqueue_disk(&failq, dp); } + + while(!empty(pestq)) { + disk_t *dp = dequeue_disk(&pestq); + + if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < 0) { + if(est(dp)->est_size[0] == -1) { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d failed: %d.", + dp->host->hostname, dp->name, + est(dp)->level[0], est(dp)->est_size[0]); + } + else { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d timed out: %d.", + dp->host->hostname, dp->name, + est(dp)->level[0], est(dp)->est_size[0]); + } + est(dp)->level[0] = -1; + } + + if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < 0) { + if(est(dp)->est_size[1] == -1) { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d failed: %d.", + dp->host->hostname, dp->name, + est(dp)->level[1], est(dp)->est_size[1]); + } + else { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d timed out: %d.", + dp->host->hostname, dp->name, + est(dp)->level[1], est(dp)->est_size[1]); + } + est(dp)->level[1] = -1; + } + + if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < 0) { + if(est(dp)->est_size[2] == -1) { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d failed: %d.", + dp->host->hostname, dp->name, + est(dp)->level[2], est(dp)->est_size[2]); + } + else { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d timed out: %d.", + dp->host->hostname, dp->name, + est(dp)->level[2], est(dp)->est_size[2]); + } + est(dp)->level[2] = -1; + } + + if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > 0) || + (est(dp)->level[1] != -1 && est(dp)->est_size[1] > 0) || + (est(dp)->level[2] != -1 && est(dp)->est_size[2] > 0)) { + enqueue_disk(&estq, dp); + } + else { + est(dp)->errstr = vstralloc("disk ", dp->name, + ", all estimate timed out", NULL); + enqueue_disk(&failq, dp); + } + } } static void getsize(hostp) -host_t *hostp; +am_host_t *hostp; { disklist_t *destqp; disk_t *dp; char *req = NULL, *errstr = NULL; int i, estimates, rc, timeout, disk_state, req_len; char number[NUM_STR_SIZE]; + char *calcsize; assert(hostp->disks != NULL); @@ -1172,6 +1247,9 @@ host_t *hostp; */ if(hostp->features != NULL) { /* sendsize service */ + int nb_client = 0; + int nb_server = 0; + int has_features = am_has_feature(hostp->features, fe_req_options_features); int has_hostname = am_has_feature(hostp->features, @@ -1201,84 +1279,208 @@ host_t *hostp; int s_len = 0; if(dp->todo == 0) continue; - + if(est(dp)->state != DISK_READY) { continue; } + est(dp)->got_estimate = 0; if(est(dp)->level[0] == -1) { est(dp)->state = DISK_DONE; continue; /* ignore this disk */ } - for(i = 0; i < MAX_LEVELS; i++) { - char *l; - char *exclude1 = ""; - char *exclude2 = ""; - char *excludefree = NULL; - char spindle[NUM_STR_SIZE]; - char level[NUM_STR_SIZE]; - int lev = est(dp)->level[i]; + if(dp->estimate == ES_CLIENT || + dp->estimate == ES_CALCSIZE) { + nb_client++; - if(lev == -1) break; + for(i = 0; i < MAX_LEVELS; i++) { + char *l; + char *exclude1 = ""; + char *exclude2 = ""; + char *excludefree = NULL; + char spindle[NUM_STR_SIZE]; + char level[NUM_STR_SIZE]; + int lev = est(dp)->level[i]; - ap_snprintf(level, sizeof(level), "%d", lev); - ap_snprintf(spindle, sizeof(spindle), "%d", dp->spindle); - if(am_has_feature(hostp->features, fe_sendsize_req_options)) { - exclude1 = " OPTIONS |"; - exclude2 = optionstr(dp, hostp->features, NULL); - excludefree = exclude2; - } - else { - if(dp->exclude_file && dp->exclude_file->nb_element == 1) { - exclude1 = " exclude-file="; - exclude2 = dp->exclude_file->first->name; + if(lev == -1) break; + + ap_snprintf(level, sizeof(level), "%d", lev); + ap_snprintf(spindle, sizeof(spindle), "%d", dp->spindle); + if(am_has_feature(hostp->features,fe_sendsize_req_options)){ + exclude1 = " OPTIONS |"; + exclude2 = optionstr(dp, hostp->features, NULL); + excludefree = exclude2; } - else if(dp->exclude_list - && dp->exclude_list->nb_element == 1) { - exclude1 = " exclude-list="; - exclude2 = dp->exclude_list->first->name; + else { + if(dp->exclude_file && + dp->exclude_file->nb_element == 1) { + exclude1 = " exclude-file="; + exclude2 = dp->exclude_file->first->name; + } + else if(dp->exclude_list && + dp->exclude_list->nb_element == 1) { + exclude1 = " exclude-list="; + exclude2 = dp->exclude_list->first->name; + } } + if(dp->estimate == ES_CALCSIZE && + !am_has_feature(hostp->features, fe_calcsize_estimate)) { + log_add(L_WARNING,"%s:%s does not support CALCSIZE for estimate, using CLIENT.\n", + hostp->hostname, dp->name); + dp->estimate = ES_CLIENT; + } + if(dp->estimate == ES_CLIENT) + calcsize = ""; + else + calcsize = "CALCSIZE "; + + if(dp->device) { + l = vstralloc(calcsize, + dp->program, " ", + dp->name, " ", + dp->device, " ", + level, " ", + est(dp)->dumpdate[i], " ", spindle, + exclude1, + exclude2, + "\n", + NULL); + } + else { + l = vstralloc(calcsize, + dp->program, " ", + dp->name, " ", + level, " ", + est(dp)->dumpdate[i], " ", spindle, + exclude1, + exclude2, + "\n", + NULL); + } + amfree(excludefree); + strappend(s, l); + s_len += strlen(l); + amfree(l); } - if(dp->device) { - l = vstralloc(dp->program, " ", - dp->name, " ", - dp->device, " ", - level, " ", - est(dp)->dumpdate[i], " ", spindle, - exclude1, - exclude2, - "\n", - NULL); - } - else { - l = vstralloc(dp->program, " ", dp->name, " ", level, " ", - est(dp)->dumpdate[i], " ", spindle, - exclude1, - exclude2, - "\n", - NULL); + /* + * Allow 2X for err response. + */ + if(req_len + s_len > MAX_DGRAM / 2) { + amfree(s); + break; } - amfree(excludefree); - strappend(s, l); - s_len += strlen(l); - amfree(l); - } - /* - * Allow 2X for err response. - */ - if(req_len + s_len > MAX_DGRAM / 2) { + estimates += i; + strappend(req, s); + req_len += s_len; amfree(s); - break; + est(dp)->state = DISK_ACTIVE; + remove_disk(&startq, dp); + } + else if (dp->estimate == ES_SERVER) { + info_t info; + + nb_server++; + get_info(dp->host->hostname, dp->name, &info); + for(i = 0; i < MAX_LEVELS; i++) { + int j; + int lev = est(dp)->level[i]; + + if(lev == -1) break; + if(lev == 0) { /* use latest level 0, should do extrapolation */ + long est_size = 0; + int nb_est = 0; + + for(j=NB_HISTORY-2;j>=0;j--) { + if(info.history[j].level == 0) { + est_size = info.history[j].size; + nb_est++; + } + } + if(nb_est > 0) { + est(dp)->est_size[i] = est_size; + } + else if(info.inf[lev].size > 1000) { /* stats */ + est(dp)->est_size[i] = info.inf[lev].size; + } + else { + est(dp)->est_size[i] = 1000000; + } + } + else if(lev == est(dp)->last_level) { + /* means of all X day at the same level */ + #define NB_DAY 30 + int nb_day = 0; + long est_size_day[NB_DAY]; + int nb_est_day[NB_DAY]; + + for(j=0;j=0;j--) { + if(info.history[j].level <= 0) continue; + if(info.history[j].level == info.history[j+1].level) { + if(nb_day NB_DAY-1) nb_day = NB_DAY-1; + + while(nb_day > 0 && nb_est_day[nb_day] == 0) nb_day--; + + if(nb_est_day[nb_day] > 0) { + est(dp)->est_size[i] = + est_size_day[nb_day] / nb_est_day[nb_day]; + } + else if(info.inf[lev].size > 1000) { /* stats */ + est(dp)->est_size[i] = info.inf[lev].size; + } + else { + est(dp)->est_size[i] = 10000; + } + } + else if(lev == est(dp)->last_level + 1) { + /* means of all first day at a new level */ + long est_size = 0; + int nb_est = 0; + + for(j=NB_HISTORY-2;j>=0;j--) { + if(info.history[j].level <= 0) continue; + if(info.history[j].level == info.history[j+1].level + 1 ) { + est_size += info.history[j].size; + nb_est++; + } + } + if(nb_est > 0) { + est(dp)->est_size[i] = est_size / nb_est; + } + else if(info.inf[lev].size > 1000) { /* stats */ + est(dp)->est_size[i] = info.inf[lev].size; + } + else { + est(dp)->est_size[i] = 100000; + } + } + } + fprintf(stderr,"%s time %s: got result for host %s disk %s:", + get_pname(), walltime_str(curclock()), + dp->host->hostname, dp->name); + fprintf(stderr," %d -> %ldK, %d -> %ldK, %d -> %ldK\n", + est(dp)->level[0], est(dp)->est_size[0], + est(dp)->level[1], est(dp)->est_size[1], + est(dp)->level[2], est(dp)->est_size[2]); + est(dp)->state = DISK_DONE; + remove_disk(&startq, dp); + enqueue_disk(&estq, dp); } - estimates += i; - strappend(req, s); - req_len += s_len; - amfree(s); - est(dp)->state = DISK_ACTIVE; - remove_disk(&startq, dp); } - if(estimates == 0) { amfree(req); hostp->up = HOST_DONE; @@ -1342,7 +1544,7 @@ host_t *hostp; } static disk_t *lookup_hostdisk(hp, str) -host_t *hp; +am_host_t *hp; char *str; { disk_t *dp; @@ -1361,7 +1563,7 @@ pkt_t *pkt; int level, i; long size; disk_t *dp; - host_t *hostp; + am_host_t *hostp; char *msgdisk=NULL, *msgdisk_undo=NULL, msgdisk_undo_ch = '\0'; char *errbuf = NULL; char *line; @@ -1371,7 +1573,7 @@ pkt_t *pkt; int ch; int tch; - hostp = (host_t *) p->datap; + hostp = (am_host_t *) p->datap; hostp->up = HOST_READY; if(p->state == S_FAILED && pkt == NULL) { @@ -1514,63 +1716,87 @@ pkt_t *pkt; for(dp = hostp->disks; dp != NULL; dp = dp->hostnext) { if(dp->todo == 0) continue; - if(est(dp)->state != DISK_ACTIVE) continue; - est(dp)->state = DISK_DONE; + if(est(dp)->state != DISK_ACTIVE && + est(dp)->state != DISK_PARTIALY_DONE) continue; + + if(est(dp)->state == DISK_ACTIVE) { + remove_disk(&waitq, dp); + } + else if(est(dp)->state == DISK_PARTIALY_DONE) { + remove_disk(&pestq, dp); + } + + if(pkt->type == P_REP) { + est(dp)->state = DISK_DONE; + } + else if(pkt->type == P_PREP) { + est(dp)->state = DISK_PARTIALY_DONE; + } + if(est(dp)->level[0] == -1) continue; /* ignore this disk */ - remove_disk(&waitq, dp); - if(est(dp)->got_estimate) { - fprintf(stderr,"%s: time %s: got result for host %s disk %s:", - get_pname(), walltime_str(curclock()), - dp->host->hostname, dp->name); - fprintf(stderr," %d -> %ldK, %d -> %ldK, %d -> %ldK\n", - est(dp)->level[0], est(dp)->est_size[0], - est(dp)->level[1], est(dp)->est_size[1], - est(dp)->level[2], est(dp)->est_size[2]); - - if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > 0) || - (est(dp)->level[1] != -1 && est(dp)->est_size[1] > 0) || - (est(dp)->level[2] != -1 && est(dp)->est_size[2] > 0)) { - - if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < 0) { - log_add(L_WARNING, - "disk %s:%s, estimate of level %d failed: %d.", - dp->host->hostname, dp->name, - est(dp)->level[2], est(dp)->est_size[2]); - est(dp)->level[2] = -1; - } - if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < 0) { - log_add(L_WARNING, - "disk %s:%s, estimate of level %d failed: %d.", - dp->host->hostname, dp->name, - est(dp)->level[1], est(dp)->est_size[1]); - est(dp)->level[1] = -1; - } - if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < 0) { - log_add(L_WARNING, - "disk %s:%s, estimate of level %d failed: %d.", - dp->host->hostname, dp->name, - est(dp)->level[0], est(dp)->est_size[0]); - est(dp)->level[0] = -1; - } - enqueue_disk(&estq, dp); + + if(pkt->type == P_PREP) { + fprintf(stderr,"%s: time %s: got partial result for host %s disk %s:", + get_pname(), walltime_str(curclock()), + dp->host->hostname, dp->name); + fprintf(stderr," %d -> %ldK, %d -> %ldK, %d -> %ldK\n", + est(dp)->level[0], est(dp)->est_size[0], + est(dp)->level[1], est(dp)->est_size[1], + est(dp)->level[2], est(dp)->est_size[2]); + enqueue_disk(&pestq, dp); + } + else if(pkt->type == P_REP) { + fprintf(stderr,"%s: time %s: got result for host %s disk %s:", + get_pname(), walltime_str(curclock()), + dp->host->hostname, dp->name); + fprintf(stderr," %d -> %ldK, %d -> %ldK, %d -> %ldK\n", + est(dp)->level[0], est(dp)->est_size[0], + est(dp)->level[1], est(dp)->est_size[1], + est(dp)->level[2], est(dp)->est_size[2]); + if((est(dp)->level[0] != -1 && est(dp)->est_size[0] > 0) || + (est(dp)->level[1] != -1 && est(dp)->est_size[1] > 0) || + (est(dp)->level[2] != -1 && est(dp)->est_size[2] > 0)) { + + if(est(dp)->level[2] != -1 && est(dp)->est_size[2] < 0) { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d failed: %d.", + dp->host->hostname, dp->name, + est(dp)->level[2], est(dp)->est_size[2]); + est(dp)->level[2] = -1; + } + if(est(dp)->level[1] != -1 && est(dp)->est_size[1] < 0) { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d failed: %d.", + dp->host->hostname, dp->name, + est(dp)->level[1], est(dp)->est_size[1]); + est(dp)->level[1] = -1; + } + if(est(dp)->level[0] != -1 && est(dp)->est_size[0] < 0) { + log_add(L_WARNING, + "disk %s:%s, estimate of level %d failed: %d.", + dp->host->hostname, dp->name, + est(dp)->level[0], est(dp)->est_size[0]); + est(dp)->level[0] = -1; + } + enqueue_disk(&estq, dp); } else { enqueue_disk(&failq, dp); - est(dp)->errstr = vstralloc("disk ", dp->name, - ", all estimate failed", NULL); + if(est(dp)->got_estimate) { + est(dp)->errstr = vstralloc("disk ", dp->name, + ", all estimate failed", NULL); + } + else { + fprintf(stderr, "error result for host %s disk %s: missing estimate\n", + dp->host->hostname, dp->name); + est(dp)->errstr = vstralloc("missing result for ", dp->name, + " in ", dp->host->hostname, + " response", + NULL); + } } } - else { - enqueue_disk(&failq, dp); - - fprintf(stderr, "error result for host %s disk %s: missing estimate\n", - dp->host->hostname, dp->name); - - est(dp)->errstr = vstralloc("missing result for ", dp->name, - " in ", dp->host->hostname, " response", - NULL); - } } getsize(hostp); return; @@ -1825,15 +2051,15 @@ disk_t *dp; return base_level; } - thresh = bump_thresh(base_level); + thresh = bump_thresh(base_level, est_size(dp, 0), dp->bumppercent, dp->bumpsize, dp->bumpmult); fprintf(stderr, " pick: size %ld level %d days %d (thresh %ldK, %d days)\n", base_size, base_level, est(dp)->level_days, - thresh, conf_bumpdays); + thresh, dp->bumpdays); if(base_level == 9 - || est(dp)->level_days < conf_bumpdays + || est(dp)->level_days < dp->bumpdays || base_size <= thresh) return base_level; @@ -1894,6 +2120,8 @@ static void delay_dumps P((void)) char est_kb[20]; /* Text formatted dump size */ int nb_forced_level_0; info_t info; + int delete; + char *message; biq.head = biq.tail = NULL; @@ -1919,30 +2147,29 @@ static void delay_dumps P((void)) ap_snprintf(est_kb, 20, "%ld KB,", est(dp)->dump_size); if(est(dp)->dump_level == 0) { - if(est(dp)->last_level == -1 || dp->skip_incr) { - delay_one_dump(dp, 1, - "dump larger than tape,", - est_kb, - "but cannot incremental dump", - dp->skip_incr ? "skip-incr": "new", - "disk", - NULL); + if(dp->skip_incr) { + delete = 1; + message = "but cannot incremental dump skip-incr disk"; + } + else if(est(dp)->last_level < 0) { + delete = 1; + message = "but cannot incremental dump new disk"; + } + else if(est(dp)->degr_level < 0) { + delete = 1; + message = "but no incremental estimate"; } else { - delay_one_dump(dp, 0, - "dump larger than tape,", - est_kb, - "full dump delayed", - NULL); + delete = 0; + message = "full dump delayed"; } } else { - delay_one_dump(dp, 1, - "dump larger than tape,", - est_kb, - "skipping incremental", - NULL); + delete = 1; + message = "skipping incremental"; } + delay_one_dump(dp, delete, "dump larger than tape,", est_kb, + message, NULL); } /* @@ -1984,22 +2211,24 @@ static void delay_dumps P((void)) /* Format dumpsize for messages */ ap_snprintf(est_kb, 20, "%ld KB,", est(dp)->dump_size); - if(est(dp)->last_level == -1 || dp->skip_incr) { - delay_one_dump(dp, 1, - "dumps too big,", - est_kb, - "but cannot incremental dump", - dp->skip_incr ? "skip-incr": "new", - "disk", - NULL); + if(dp->skip_incr) { + delete = 1; + message = "but cannot incremental dump skip-incr disk"; + } + else if(est(dp)->last_level < 0) { + delete = 1; + message = "but cannot incremental dump new disk"; + } + else if(est(dp)->degr_level < 0) { + delete = 1; + message = "but no incremental estimate"; } else { - delay_one_dump(dp, 0, - "dumps too big,", - est_kb, - "full dump delayed", - NULL); + delete = 0; + message = "full dump delayed"; } + delay_one_dump(dp, delete, "dumps too big,", est_kb, + message, NULL); } } @@ -2015,22 +2244,24 @@ static void delay_dumps P((void)) /* Format dumpsize for messages */ ap_snprintf(est_kb, 20, "%ld KB,", est(dp)->dump_size); - if(est(dp)->last_level == -1 || dp->skip_incr) { - delay_one_dump(dp, 1, - "dumps too big,", - est_kb, - "but cannot incremental dump", - dp->skip_incr ? "skip-incr": "new", - "disk", - NULL); + if(dp->skip_incr) { + delete = 1; + message = "but cannot incremental dump skip-incr disk"; + } + else if(est(dp)->last_level < 0) { + delete = 1; + message = "but cannot incremental dump new disk"; + } + else if(est(dp)->degr_level < 0) { + delete = 1; + message = "but no incremental estimate"; } else { - delay_one_dump(dp, 0, - "dumps too big,", - est_kb, - "full dump delayed", - NULL); + delete = 0; + message = "full dump delayed"; } + delay_one_dump(dp, delete, "dumps too big,", est_kb, + message, NULL); } } } @@ -2321,29 +2552,30 @@ static int promote_hills P((void)) int disks; long size; } *sp = NULL; - int tapecycle; int days; int hill_days = 0; long hill_size; long new_size; long new_total; + int my_dumpcycle; /* If we are already doing a level 0 don't bother */ if(total_lev0 > 0) return 0; /* Do the guts of an "amadmin balance" */ - tapecycle = conf_tapecycle; + my_dumpcycle = conf_dumpcycle; + if(my_dumpcycle > 10000) my_dumpcycle = 10000; sp = (struct balance_stats *) - alloc(sizeof(struct balance_stats) * tapecycle); + alloc(sizeof(struct balance_stats) * my_dumpcycle); - for(days = 0; days < tapecycle; days++) + for(days = 0; days < my_dumpcycle; days++) sp[days].disks = sp[days].size = 0; for(dp = schedq.head; dp != NULL; dp = dp->next) { days = est(dp)->next_level0; /* This is > 0 by definition */ - if(daysskip_full && dp->strategy != DS_NOFULL && + if(daysskip_full && dp->strategy != DS_NOFULL && dp->strategy != DS_INCRONLY) { sp[days].disks++; sp[days].size += est(dp)->last_lev0size; @@ -2354,7 +2586,7 @@ static int promote_hills P((void)) while(1) { /* Find the tallest hill */ hill_size = 0; - for(days = 0; days < tapecycle; days++) { + for(days = 0; days < my_dumpcycle; days++) { if(sp[days].disks > 1 && sp[days].size > hill_size) { hill_size = sp[days].size; hill_days = days;