2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-2000 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: driver.c,v 1.58.2.31.2.8.2.21 2004/04/26 15:02:47 martinea Exp $
29 * controlling process for the Amanda backup system
33 * XXX possibly modify tape queue to be cognizant of how much room is left on
34 * tape. Probably not effective though, should do this in planner.
47 #include "server_util.h"
49 disklist_t waitq, runq, tapeq, roomq;
50 int pending_aborts, inside_dump_to_tape;
53 unsigned long reserved_space;
54 unsigned long total_disksize;
58 long tape_length, tape_left = 0;
60 host_t *flushhost = NULL;
62 int client_constrained P((disk_t *dp));
63 int sort_by_priority_reversed P((disk_t *a, disk_t *b));
64 int sort_by_time P((disk_t *a, disk_t *b));
65 int start_some_dumps P((disklist_t *rq));
66 void dump_schedule P((disklist_t *qp, char *str));
67 void start_degraded_mode P((disklist_t *queuep));
68 void handle_taper_result P((void));
69 dumper_t *idle_dumper P((void));
70 int some_dumps_in_progress P((void));
71 int num_busy_dumpers P((void));
72 dumper_t *lookup_dumper P((int fd));
73 void handle_dumper_result P((int fd));
74 void read_flush P((disklist_t *tapeqp));
75 void read_schedule P((disklist_t *waitqp, disklist_t *runqp));
76 int free_kps P((interface_t *ip));
77 void interface_state P((char *time_str));
78 void allocate_bandwidth P((interface_t *ip, int kps));
79 void deallocate_bandwidth P((interface_t *ip, int kps));
80 unsigned long free_space P((void));
81 assignedhd_t **find_diskspace P((unsigned long size, int *cur_idle, assignedhd_t *preferred));
82 char *diskname2filename P((char *dname));
83 int assign_holdingdisk P((assignedhd_t **holdp, disk_t *diskp));
84 static void adjust_diskspace P((disk_t *diskp, cmd_t cmd));
85 static void delete_diskspace P((disk_t *diskp));
86 assignedhd_t **build_diskspace P((char *destname));
87 void holdingdisk_state P((char *time_str));
88 int dump_to_tape P((disk_t *dp));
89 int queue_length P((disklist_t q));
90 void short_dump_state P((void));
91 void dump_state P((char *str));
92 void startaflush P((void));
93 int main P((int main_argc, char **main_argv));
95 static int idle_reason;
98 char *idle_strings[] = {
101 #define IDLE_START_WAIT 1
103 #define IDLE_NO_DUMPERS 2
105 #define IDLE_NO_HOLD 3
107 #define IDLE_CLIENT_CONSTRAINED 4
108 "client-constrained",
109 #define IDLE_NO_DISKSPACE 5
111 #define IDLE_TOO_LARGE 6
113 #define IDLE_NO_BANDWIDTH 7
115 #define IDLE_TAPER_WAIT 8
119 #define SLEEP_MAX (24*3600)
120 struct timeval sleep_time = { SLEEP_MAX, 0 };
121 /* enabled if any disks are in start-wait: */
122 int any_delayed_disk = 0;
124 int main(main_argc, main_argv)
134 generic_fs_stats_t fs;
136 unsigned long malloc_hist_1, malloc_size_1;
137 unsigned long malloc_hist_2, malloc_size_2;
138 unsigned long reserve = 100;
143 char *result_argv[MAX_ARGS+1];
149 for(fd = 3; fd < FD_SETSIZE; fd++) {
151 * Make sure nobody spoofs us with a lot of extra open files
152 * that would cause an open we do to get a very high file
153 * descriptor, which in turn might be used as an index into
154 * an array (e.g. an fd_set).
161 signal(SIGPIPE, SIG_IGN);
163 malloc_size_1 = malloc_inuse(&malloc_hist_1);
165 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
166 set_logerror(logerror);
171 printf("%s: pid %ld executable %s version %s\n",
172 get_pname(), (long) getpid(), main_argv[0], version());
175 config_name = stralloc(main_argv[1]);
176 config_dir = vstralloc(CONFIG_DIR, "/", config_name, "/", NULL);
178 if(strncmp(main_argv[2], "nodump", 6) == 0) {
184 char my_cwd[STR_SIZE];
186 if (getcwd(my_cwd, sizeof(my_cwd)) == NULL) {
187 error("cannot determine current working directory");
189 config_dir = stralloc2(my_cwd, "/");
190 if ((config_name = strrchr(my_cwd, '/')) != NULL) {
191 config_name = stralloc(config_name + 1);
197 conffile = stralloc2(config_dir, CONFFILE_NAME);
198 if(read_conffile(conffile)) {
199 error("errors processing config file \"%s\"", conffile);
204 datestamp = construct_datestamp(NULL);
205 log_add(L_START,"date %s", datestamp);
207 taper_program = vstralloc(libexecdir, "/", "taper", versionsuffix(), NULL);
208 dumper_program = vstralloc(libexecdir, "/", "dumper", versionsuffix(),
211 conf_taperalgo = getconf_int(CNF_TAPERALGO);
212 conf_tapetype = getconf_str(CNF_TAPETYPE);
213 tape = lookup_tapetype(conf_tapetype);
214 tape_length = tape->length;
215 printf("driver: tape size %ld\n", tape_length);
217 /* taper takes a while to get going, so start it up right away */
220 startup_tape_process(taper_program);
221 taper_cmd(START_TAPER, datestamp, NULL, 0, NULL);
223 /* start initializing: read in databases */
225 conf_diskfile = getconf_str(CNF_DISKFILE);
226 if (*conf_diskfile == '/') {
227 conf_diskfile = stralloc(conf_diskfile);
229 conf_diskfile = stralloc2(config_dir, conf_diskfile);
231 if((origqp = read_diskfile(conf_diskfile)) == NULL) {
232 error("could not load disklist \"%s\"", conf_diskfile);
234 amfree(conf_diskfile);
236 /* set up any configuration-dependent variables */
238 inparallel = getconf_int(CNF_INPARALLEL);
240 reserve = getconf_int(CNF_RESERVE);
243 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
244 hdp->up = (void *)alloc(sizeof(holdalloc_t));
245 holdalloc(hdp)->allocated_dumpers = 0;
246 holdalloc(hdp)->allocated_space = 0L;
248 if(get_fs_stats(hdp->diskdir, &fs) == -1
249 || access(hdp->diskdir, W_OK) == -1) {
250 log_add(L_WARNING, "WARNING: ignoring holding disk %s: %s\n",
251 hdp->diskdir, strerror(errno));
257 if(hdp->disksize > 0) {
258 if(hdp->disksize > fs.avail) {
260 "WARNING: %s: %ld KB requested, but only %ld KB available.",
261 hdp->diskdir, hdp->disksize, fs.avail);
262 hdp->disksize = fs.avail;
265 else if(fs.avail + hdp->disksize < 0) {
267 "WARNING: %s: not %ld KB free.",
268 hdp->diskdir, -hdp->disksize);
273 hdp->disksize += fs.avail;
276 printf("driver: adding holding disk %d dir %s size %ld\n",
277 dsk, hdp->diskdir, hdp->disksize);
279 newdir = newvstralloc(newdir,
280 hdp->diskdir, "/", datestamp,
282 if(!mkholdingdir(newdir)) {
285 total_disksize += hdp->disksize;
288 reserved_space = total_disksize * (reserve / 100.0);
290 printf("reserving %ld out of %ld for degraded-mode dumps\n",
291 reserved_space, free_space());
295 if(inparallel > MAX_DUMPERS) inparallel = MAX_DUMPERS;
297 /* fire up the dumpers now while we are waiting */
299 if(!nodump) startup_dump_processes(dumper_program, inparallel);
302 * Read schedule from stdin. Usually, this is a pipe from planner,
303 * so the effect is that we wait here for the planner to
304 * finish, but meanwhile the taper is rewinding the tape, reading
305 * the label, checking it, writing a new label and all that jazz
306 * in parallel with the planner.
310 tapeq.head = tapeq.tail = NULL;
311 roomq.head = roomq.tail = NULL;
312 runq.head = runq.tail = NULL;
315 if(!nodump) read_schedule(&waitq, &runq);
317 log_add(L_STATS, "startup time %s", walltime_str(curclock()));
319 printf("driver: start time %s inparallel %d bandwidth %d diskspace %lu",
320 walltime_str(curclock()), inparallel, free_kps((interface_t *)0),
322 printf(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n",
323 "OBSOLETE", datestamp, taperalgo2str(conf_taperalgo),
324 getconf_str(CNF_DUMPORDER));
327 /* ok, planner is done, now lets see if the tape is ready */
329 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
331 if(cmd != TAPER_OK) {
332 /* no tape, go into degraded mode: dump to holding disk */
333 start_degraded_mode(&runq);
334 FD_CLR(taper,&readset);
337 tape_left = tape_length;
342 while(start_some_dumps(&runq) || some_dumps_in_progress() ||
347 /* wait for results */
349 memcpy(&selectset, &readset, sizeof(fd_set));
350 if(select(maxfd+1, (SELECT_ARG_TYPE *)(&selectset),
351 NULL, NULL, &sleep_time) == -1)
352 error("select: %s", strerror(errno));
354 /* handle any results that have come in */
356 for(fd = 0; fd <= maxfd; fd++) {
358 * The first pass through the following loop, we have
359 * data ready for areads (called by getresult, called by
360 * handle_.*_result). But that may read more than one record,
361 * so we need to keep processing as long as areads has data.
362 * We will get control back after each record and the buffer
363 * will go empty (indicated by areads_dataready(fd) == 0)
364 * after the last one available has been processed.
366 while(FD_ISSET(fd, &selectset) || areads_dataready(fd) > 0) {
367 if(fd == taper) handle_taper_result();
368 else handle_dumper_result(fd);
369 FD_CLR(fd, &selectset);
375 /* handle any remaining dumps by dumping directly to tape, if possible */
377 while(!empty(runq)) {
378 diskp = dequeue_disk(&runq);
380 int rc = dump_to_tape(diskp);
383 "%s %s %d [dump to tape failed, will try again]",
384 diskp->host->hostname,
386 sched(diskp)->level);
388 log_add(L_FAIL, "%s %s %s %d [dump to tape failed]",
389 diskp->host->hostname,
391 sched(diskp)->datestamp,
392 sched(diskp)->level);
395 log_add(L_FAIL, "%s %s %s %d [%s]",
396 diskp->host->hostname, diskp->name,
397 sched(diskp)->datestamp, sched(diskp)->level,
399 "can't dump no-hold disk in degraded mode" :
400 "no more holding disk space");
403 short_dump_state(); /* for amstatus */
405 printf("driver: QUITTING time %s telling children to quit\n",
406 walltime_str(curclock()));
410 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
411 dumper_cmd(dumper, QUIT, NULL);
416 taper_cmd(QUIT, NULL, NULL, 0, NULL);
419 /* wait for all to die */
422 char number[NUM_STR_SIZE];
428 if((pid = wait(&retstat)) == -1) {
429 if(errno == EINTR) continue;
433 if(! WIFEXITED(retstat)) {
435 code = WTERMSIG(retstat);
436 } else if(WEXITSTATUS(retstat) != 0) {
438 code = WEXITSTATUS(retstat);
441 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
442 if(pid == dumper->pid) {
443 who = stralloc(dumper->name);
447 if(who == NULL && pid == taper_pid) {
448 who = stralloc("taper");
450 if(what != NULL && who == NULL) {
451 ap_snprintf(number, sizeof(number), "%ld", (long)pid);
452 who = stralloc2("unknown pid ", number);
455 log_add(L_WARNING, "%s exited with %s %d\n", who, what, code);
456 printf("driver: %s exited with %s %d\n", who, what, code);
461 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
462 amfree(dumper->name);
465 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
466 cleanup_holdingdisk(hdp->diskdir, 0);
471 printf("driver: FINISHED time %s\n", walltime_str(curclock()));
473 log_add(L_FINISH,"date %s time %s", datestamp, walltime_str(curclock()));
476 amfree(dumper_program);
477 amfree(taper_program);
481 malloc_size_2 = malloc_inuse(&malloc_hist_2);
483 if(malloc_size_1 != malloc_size_2) {
484 malloc_list(fileno(stderr), malloc_hist_1, malloc_hist_2);
495 if(!degraded_mode && !taper_busy && !empty(tapeq)) {
496 datestamp = sched(tapeq.head)->datestamp;
497 switch(conf_taperalgo) {
499 dp = dequeue_disk(&tapeq);
503 while (fit != NULL) {
504 if(sched(fit)->act_size <= tape_left &&
505 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
513 if(dp) remove_disk(&tapeq, dp);
516 fit = dp = tapeq.head;
517 while (fit != NULL) {
518 if(sched(fit)->act_size > sched(dp)->act_size &&
519 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
524 if(dp) remove_disk(&tapeq, dp);
526 case ALGO_LARGESTFIT:
528 while (fit != NULL) {
529 if(sched(fit)->act_size <= tape_left &&
530 (!dp || sched(fit)->act_size > sched(dp)->act_size) &&
531 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
536 if(dp) remove_disk(&tapeq, dp);
539 fit = dp = tapeq.head;
540 while (fit != NULL) {
541 if(sched(fit)->act_size < sched(dp)->act_size &&
542 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
547 if(dp) remove_disk(&tapeq, dp);
551 remove_disk(&tapeq, dp);
555 dp = dequeue_disk(&tapeq); /* first if nothing fit */
557 "driver: startaflush: Using first because nothing fit\n");
561 taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
562 sched(dp)->datestamp);
563 fprintf(stderr,"driver: startaflush: %s %s %s %ld %ld\n",
564 taperalgo2str(conf_taperalgo), dp->host->hostname,
565 dp->name, sched(taper_disk)->act_size, tape_left);
566 tape_left -= sched(dp)->act_size;
571 int client_constrained(dp)
576 /* first, check if host is too busy */
578 if(dp->host->inprogress >= dp->host->maxdumps) {
582 /* next, check conflict with other dumps on same spindle */
584 if(dp->spindle == -1) { /* but spindle -1 never conflicts by def. */
588 for(dp2 = dp->host->disks; dp2 != NULL; dp2 = dp2->hostnext)
589 if(dp2->inprogress && dp2->spindle == dp->spindle) {
596 int start_some_dumps(rq)
600 disk_t *diskp, *diskp_accept;
602 assignedhd_t **holdp=NULL, **holdp_accept;
603 time_t now = time(NULL);
606 idle_reason = IDLE_NO_DUMPERS;
607 sleep_time.tv_sec = SLEEP_MAX;
608 sleep_time.tv_usec = 0;
609 any_delayed_disk = 0;
611 if(rq->head == NULL) {
617 * A potential problem with starting from the bottom of the dump time
618 * distribution is that a slave host will have both one of the shortest
619 * and one of the longest disks, so starting its shortest disk first will
620 * tie up the host and eliminate its longest disk from consideration the
621 * first pass through. This could cause a big delay in starting that long
622 * disk, which could drag out the whole night's dumps.
624 * While starting from the top of the dump time distribution solves the
625 * above problem, this turns out to be a bad idea, because the big dumps
626 * will almost certainly pack the holding disk completely, leaving no
627 * room for even one small dump to start. This ends up shutting out the
628 * small-end dumpers completely (they stay idle).
630 * The introduction of multiple simultaneous dumps to one host alleviates
631 * the biggest&smallest dumps problem: both can be started at the
634 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++) {
635 if(dumper->busy || dumper->down) continue;
636 /* found an idle dumper, now find a disk for it */
641 if(idle_reason == IDLE_NO_DUMPERS)
642 idle_reason = NOT_IDLE;
647 assert(diskp->host != NULL && sched(diskp) != NULL);
649 /* round estimate to next multiple of DISK_BLOCK_KB */
650 sched(diskp)->est_size = am_round(sched(diskp)->est_size,
653 if(diskp->host->start_t > now) {
654 cur_idle = max(cur_idle, IDLE_START_WAIT);
655 sleep_time.tv_sec = min(diskp->host->start_t - now,
657 any_delayed_disk = 1;
659 else if(diskp->start_t > now) {
660 cur_idle = max(cur_idle, IDLE_START_WAIT);
661 sleep_time.tv_sec = min(diskp->start_t - now,
663 any_delayed_disk = 1;
665 else if(diskp->host->netif->curusage > 0 &&
666 sched(diskp)->est_kps > free_kps(diskp->host->netif))
667 cur_idle = max(cur_idle, IDLE_NO_BANDWIDTH);
668 else if(sched(diskp)->no_space)
669 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
670 else if((holdp = find_diskspace(sched(diskp)->est_size,&cur_idle,NULL)) == NULL)
671 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
672 else if(diskp->no_hold) {
673 free_assignedhd(holdp);
674 cur_idle = max(cur_idle, IDLE_NO_HOLD);
675 } else if(client_constrained(diskp)) {
676 free_assignedhd(holdp);
677 cur_idle = max(cur_idle, IDLE_CLIENT_CONSTRAINED);
680 /* disk fits, dump it */
681 int accept = !diskp_accept;
684 char *dumporder = getconf_str(CNF_DUMPORDER);
685 if(strlen(dumporder) <= (dumper-dmptable)) {
686 if(dumper-dmptable < 3)
692 dumptype = dumporder[dumper-dmptable];
695 case 's': accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
697 case 'S': accept = (sched(diskp)->est_size > sched(diskp_accept)->est_size);
699 case 't': accept = (sched(diskp)->est_time < sched(diskp_accept)->est_time);
701 case 'T': accept = (sched(diskp)->est_time > sched(diskp_accept)->est_time);
703 case 'b': accept = (sched(diskp)->est_kps < sched(diskp_accept)->est_kps);
705 case 'B': accept = (sched(diskp)->est_kps > sched(diskp_accept)->est_kps);
707 default: log_add(L_WARNING, "Unknown dumporder character \'%c\', using 's'.\n",
709 accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
714 if( !diskp_accept || !degraded_mode || diskp->priority >= diskp_accept->priority) {
715 if(holdp_accept) free_assignedhd(holdp_accept);
716 diskp_accept = diskp;
717 holdp_accept = holdp;
720 free_assignedhd(holdp);
724 free_assignedhd(holdp);
730 diskp = diskp_accept;
731 holdp = holdp_accept;
734 sched(diskp)->act_size = 0;
735 allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
736 sched(diskp)->activehd = assign_holdingdisk(holdp, diskp);
738 diskp->host->inprogress += 1; /* host is now busy */
739 diskp->inprogress = 1;
740 sched(diskp)->dumper = dumper;
741 sched(diskp)->timestamp = time((time_t *)0);
743 dumper->busy = 1; /* dumper is now busy */
744 dumper->dp = diskp; /* link disk to dumper */
746 remove_disk(rq, diskp); /* take it off the run queue */
747 dumper_cmd(dumper, FILE_DUMP, diskp);
748 diskp->host->start_t = time(NULL) + 15;
750 idle_reason = max(idle_reason, cur_idle);
755 int sort_by_priority_reversed(a, b)
758 if(sched(b)->priority - sched(a)->priority != 0)
759 return sched(b)->priority - sched(a)->priority;
761 return sort_by_time(a, b);
764 int sort_by_time(a, b)
769 if ((diff = sched(a)->est_time - sched(b)->est_time) < 0) {
771 } else if (diff > 0) {
778 void dump_schedule(qp, str)
784 printf("dump of driver schedule %s:\n--------\n", str);
786 for(dp = qp->head; dp != NULL; dp = dp->next) {
787 printf(" %-10.10s %.16s lv %d t %5ld s %8lu p %d\n",
788 dp->host->hostname, dp->name, sched(dp)->level,
789 sched(dp)->est_time, sched(dp)->est_size, sched(dp)->priority);
791 printf("--------\n");
795 void start_degraded_mode(queuep)
800 unsigned long est_full_size;
802 newq.head = newq.tail = 0;
804 dump_schedule(queuep, "before start degraded mode");
807 while(!empty(*queuep)) {
808 dp = dequeue_disk(queuep);
810 if(sched(dp)->level != 0)
811 /* go ahead and do the disk as-is */
812 insert_disk(&newq, dp, sort_by_priority_reversed);
814 if (reserved_space + est_full_size + sched(dp)->est_size
816 insert_disk(&newq, dp, sort_by_priority_reversed);
817 est_full_size += sched(dp)->est_size;
819 else if(sched(dp)->degr_level != -1) {
820 sched(dp)->level = sched(dp)->degr_level;
821 sched(dp)->dumpdate = sched(dp)->degr_dumpdate;
822 sched(dp)->est_size = sched(dp)->degr_size;
823 sched(dp)->est_time = sched(dp)->degr_time;
824 sched(dp)->est_kps = sched(dp)->degr_kps;
825 insert_disk(&newq, dp, sort_by_priority_reversed);
828 log_add(L_FAIL, "%s %s %s %d [can't switch to incremental dump]",
829 dp->host->hostname, dp->name,
830 sched(dp)->datestamp, sched(dp)->level);
838 dump_schedule(queuep, "after start degraded mode");
841 void continue_dumps()
845 int active_dumpers=0, busy_dumpers=0, i;
848 /* First we try to grant diskspace to some dumps waiting for it. */
849 for( dp = roomq.head; dp; dp = ndp ) {
851 /* find last holdingdisk used by this dump */
852 for( i = 0, h = sched(dp)->holdp; h[i+1]; i++ );
853 /* find more space */
854 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size, &active_dumpers, h[i] );
856 for(dumper = dmptable; dumper < dmptable + inparallel &&
857 dumper->dp != dp; dumper++);
858 assert( dumper < dmptable + inparallel );
859 sched(dp)->activehd = assign_holdingdisk( h, dp );
860 dumper_cmd( dumper, CONTINUE, dp );
862 remove_disk( &roomq, dp );
866 /* So for some disks there is less holding diskspace available than
867 * was asked for. Possible reasons are
868 * a) diskspace has been allocated for other dumps which are
869 * still running or already being written to tape
870 * b) all other dumps have been suspended due to lack of diskspace
871 * c) this dump doesn't fit on all the holding disks
872 * Case a) is not a problem. We just wait for the diskspace to
873 * be freed by moving the current disk to a queue.
874 * If case b) occurs, we have a deadlock situation. We select
875 * a dump from the queue to be aborted and abort it. It will
876 * be retried later dumping to disk.
877 * If case c) is detected, the dump is aborted. Next time
878 * it will be dumped directly to tape. Actually, case c is a special
879 * manifestation of case b) where only one dumper is busy.
881 for( dp=NULL, dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
884 if( !find_disk(&roomq, dumper->dp) ) {
886 } else if( !dp || sched(dp)->est_size > sched(dumper->dp)->est_size ) {
891 if( !active_dumpers && busy_dumpers > 0 &&
892 ((!taper_busy && empty(tapeq)) || degraded_mode) &&
893 pending_aborts == 0 ) { /* not case a */
894 if( busy_dumpers == 1 ) { /* case c */
895 sched(dp)->no_space = 1;
898 /* At this time, dp points to the dump with the smallest est_size.
899 * We abort that dump, hopefully not wasting too much time retrying it.
901 remove_disk( &roomq, dp );
902 dumper_cmd( sched(dp)->dumper, ABORT, NULL );
907 void handle_taper_result()
913 char *result_argv[MAX_ARGS+1];
915 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
919 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
920 if(result_argc != 5) {
921 error("error: [taper DONE result_argc != 5: %d", result_argc);
924 dp = serial2disk(result_argv[2]);
925 free_serial(result_argv[2]);
927 filenum = atoi(result_argv[4]);
928 update_info_taper(dp, result_argv[3], filenum, sched(dp)->level);
930 delete_diskspace(dp);
932 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
933 walltime_str(curclock()), dp->host->hostname, dp->name);
936 amfree(sched(dp)->dumpdate);
937 amfree(sched(dp)->degr_dumpdate);
938 amfree(sched(dp)->datestamp);
944 continue_dumps(); /* continue with those dumps waiting for diskspace */
947 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
948 if (result_argc < 2) {
949 error("error [taper TRYAGAIN result_argc < 2: %d]", result_argc);
951 dp = serial2disk(result_argv[2]);
952 free_serial(result_argv[2]);
953 printf("driver: taper-tryagain time %s disk %s:%s\n",
954 walltime_str(curclock()), dp->host->hostname, dp->name);
957 /* re-insert into taper queue */
959 if(sched(dp)->attempted) {
960 log_add(L_FAIL, "%s %s %d %s [too many taper retries]",
961 dp->host->hostname, dp->name, sched(dp)->level,
962 sched(dp)->datestamp);
965 sched(dp)->attempted++;
966 headqueue_disk(&tapeq, dp);
969 tape_left = tape_length;
971 /* run next thing from queue */
975 continue_dumps(); /* continue with those dumps waiting for diskspace */
979 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
980 dp = serial2disk(result_argv[2]);
981 free_serial(result_argv[2]);
982 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
983 walltime_str(curclock()), dp->host->hostname, dp->name);
985 /* Note: fall through code... */
989 * Since we've gotten a tape error, we can't send anything more
990 * to the taper. Go into degraded mode to try to get everthing
991 * onto disk. Later, these dumps can be flushed to a new tape.
992 * The tape queue is zapped so that it appears empty in future
993 * checks. If there are dumps waiting for diskspace to be freed,
998 "going into degraded mode because of tape error.");
1000 start_degraded_mode(&runq);
1003 tapeq.head = tapeq.tail = NULL;
1004 FD_CLR(taper,&readset);
1005 if(cmd != TAPE_ERROR) aclose(taper);
1009 error("driver received unexpected token (%d) from taper", cmd);
1014 dumper_t *idle_dumper()
1018 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1019 if(!dumper->busy && !dumper->down) return dumper;
1024 int some_dumps_in_progress()
1028 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1029 if(dumper->busy) return 1;
1034 int num_busy_dumpers()
1040 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1041 if(dumper->busy) n += 1;
1046 dumper_t *lookup_dumper(fd)
1051 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1052 if(dumper->outfd == fd) return dumper;
1058 void handle_dumper_result(fd)
1061 assignedhd_t **h=NULL;
1069 char *result_argv[MAX_ARGS+1];
1073 dumper = lookup_dumper(fd);
1075 assert(dp && sched(dp) && sched(dp)->destname);
1077 if(dp && sched(dp) && sched(dp)->holdp) {
1078 h = sched(dp)->holdp;
1079 activehd = sched(dp)->activehd;
1082 cmd = getresult(fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1085 sdp = serial2disk(result_argv[2]); /* result_argv[2] always contains the serial number */
1091 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <err str> */
1092 if(result_argc != 6) {
1093 error("error [dumper DONE result_argc != 6: %d]", result_argc);
1096 free_serial(result_argv[2]);
1098 origsize = (long)atof(result_argv[3]);
1099 dumpsize = (long)atof(result_argv[4]);
1100 dumptime = (long)atof(result_argv[5]);
1101 update_info_dumper(dp, origsize, dumpsize, dumptime);
1103 /* adjust holdp[active]->used using the real dumpsize and all other
1104 * holdp[i]->used as an estimate.
1108 for( i = 0, h = sched(dp)->holdp; i < activehd; i++ ) {
1109 dummy += h[i]->used;
1112 rename_tmp_holding(sched(dp)->destname, 1);
1113 assert( h && activehd >= 0 );
1114 h[activehd]->used = size_holding_files(sched(dp)->destname) - dummy;
1115 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1116 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1117 adjust_diskspace(dp, DONE);
1119 dp->host->inprogress -= 1;
1121 sched(dp)->attempted = 0;
1122 printf("driver: finished-cmd time %s %s dumped %s:%s\n",
1123 walltime_str(curclock()), dumper->name,
1124 dp->host->hostname, dp->name);
1127 enqueue_disk(&tapeq, dp);
1135 case TRYAGAIN: /* TRY-AGAIN <handle> <err str> */
1136 case FATAL_TRYAGAIN:
1137 free_serial(result_argv[2]);
1139 rename_tmp_holding(sched(dp)->destname, 0);
1140 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1141 assert( h && activehd >= 0 );
1142 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1143 /* Because we don't know how much was written to disk the
1144 * following functions *must* be called together!
1146 adjust_diskspace(dp, DONE);
1147 delete_diskspace(dp);
1149 dp->host->inprogress -= 1;
1152 if(sched(dp)->attempted) {
1153 log_add(L_FAIL, "%s %s %d %s[could not connect to %s]",
1154 dp->host->hostname, dp->name,
1155 sched(dp)->level, sched(dp)->datestamp, dp->host->hostname);
1157 sched(dp)->attempted++;
1158 enqueue_disk(&runq, dp);
1162 if(cmd == FATAL_TRYAGAIN) {
1163 /* dumper is confused, start another */
1164 log_add(L_WARNING, "%s (pid %ld) confused, restarting it.",
1165 dumper->name, (long)dumper->pid);
1166 FD_CLR(fd,&readset);
1168 startup_dump_process(dumper, dumper_program);
1170 /* sleep in case the dumper failed because of a temporary network
1171 problem, as NIS or NFS... */
1175 case FAILED: /* FAILED <handle> <errstr> */
1176 free_serial(result_argv[2]);
1178 rename_tmp_holding(sched(dp)->destname, 0);
1179 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1180 assert( h && activehd >= 0 );
1181 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1182 /* Because we don't know how much was written to disk the
1183 * following functions *must* be called together!
1185 adjust_diskspace(dp, DONE);
1186 delete_diskspace(dp);
1188 dp->host->inprogress -= 1;
1192 /* no need to log this, dumper will do it */
1193 /* sleep in case the dumper failed because of a temporary network
1194 problem, as NIS or NFS... */
1198 case NO_ROOM: /* NO-ROOM <handle> <missing_size> */
1199 assert( h && activehd >= 0 );
1200 h[activehd]->used -= atoi(result_argv[3]);
1201 h[activehd]->reserved -= atoi(result_argv[3]);
1202 holdalloc(h[activehd]->disk)->allocated_space -= atoi(result_argv[3]);
1203 h[activehd]->disk->disksize -= atoi(result_argv[3]);
1206 case RQ_MORE_DISK: /* RQ-MORE-DISK <handle> */
1207 assert( h && activehd >= 0 );
1208 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1209 h[activehd]->used = h[activehd]->reserved;
1210 if( h[++activehd] ) { /* There's still some allocated space left. Tell
1211 * the dumper about it. */
1212 sched(dp)->activehd++;
1213 dumper_cmd( dumper, CONTINUE, dp );
1214 } else { /* !h[++activehd] - must allocate more space */
1215 sched(dp)->act_size = sched(dp)->est_size; /* not quite true */
1216 sched(dp)->est_size = sched(dp)->act_size * 21 / 20; /* +5% */
1217 sched(dp)->est_size = am_round(sched(dp)->est_size, DISK_BLOCK_KB);
1218 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1222 /* cur_idle = max(cur_idle, IDLE_NO_DISKSPACE); */
1223 /* No diskspace available. The reason for this will be
1224 * determined in continue_dumps(). */
1225 enqueue_disk( &roomq, dp );
1228 /* OK, allocate space for disk and have dumper continue */
1229 sched(dp)->activehd = assign_holdingdisk( h, dp );
1230 dumper_cmd( dumper, CONTINUE, dp );
1236 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1237 assert(pending_aborts);
1238 free_serial(result_argv[2]);
1240 rename_tmp_holding(sched(dp)->destname, 0);
1241 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1242 /* Because we don't know how much was written to disk the
1243 * following functions *must* be called together!
1245 adjust_diskspace(dp, DONE);
1246 delete_diskspace(dp);
1247 sched(dp)->attempted++;
1248 enqueue_disk(&runq, dp); /* we'll try again later */
1250 dp->host->inprogress -= 1;
1258 /* either EOF or garbage from dumper. Turn it off */
1259 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1260 dumper->name, (long)dumper->pid);
1261 FD_CLR(fd,&readset);
1264 dumper->down = 1; /* mark it down so it isn't used again */
1266 /* if it was dumping something, zap it and try again */
1267 rename_tmp_holding(sched(dp)->destname, 0);
1268 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1269 assert( h && activehd >= 0 );
1270 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1271 /* Because we don't know how much was written to disk the
1272 * following functions *must* be called together!
1274 adjust_diskspace(dp, DONE);
1275 delete_diskspace(dp);
1276 dp->host->inprogress -= 1;
1278 if(sched(dp)->attempted) {
1279 log_add(L_FAIL, "%s %s %d %s [%s died]",
1280 dp->host->hostname, dp->name,
1281 sched(dp)->level, sched(dp)->datestamp, dumper->name);
1284 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1285 dumper->name, dp->host->hostname, dp->name,
1287 sched(dp)->attempted++;
1288 enqueue_disk(&runq, dp);
1303 void read_flush(tapeqp)
1310 char *hostname, *diskname, *datestamp;
1314 char *inpline = NULL;
1318 long flush_size = 0;
1320 /* read schedule from stdin */
1322 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1328 skip_whitespace(s, ch); /* find the command */
1330 error("Aflush line %d: syntax error", line);
1334 skip_non_whitespace(s, ch);
1337 if(strcmp(command,"ENDFLUSH") == 0) {
1341 if(strcmp(command,"FLUSH") != 0) {
1342 error("Bflush line %d: syntax error", line);
1346 skip_whitespace(s, ch); /* find the hostname */
1348 error("Cflush line %d: syntax error", line);
1352 skip_non_whitespace(s, ch);
1355 skip_whitespace(s, ch); /* find the diskname */
1357 error("Cflush line %d: syntax error", line);
1361 skip_non_whitespace(s, ch);
1364 skip_whitespace(s, ch); /* find the datestamp */
1366 error("Cflush line %d: syntax error", line);
1370 skip_non_whitespace(s, ch);
1373 skip_whitespace(s, ch); /* find the level number */
1374 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1375 error("Cflush line %d: syntax error", line);
1378 skip_integer(s, ch);
1380 skip_whitespace(s, ch); /* find the filename */
1382 error("Cflush line %d: syntax error", line);
1386 skip_non_whitespace(s, ch);
1389 get_dumpfile(destname, &file);
1390 if( file.type != F_DUMPFILE) {
1391 if( file.type != F_CONT_DUMPFILE )
1392 log_add(L_INFO, "%s: ignoring cruft file.", destname);
1396 if(strcmp(hostname, file.name) != 0 ||
1397 strcmp(diskname, file.disk) != 0 ||
1398 strcmp(datestamp, file.datestamp) != 0) {
1399 log_add(L_INFO, "disk %s:%s not consistent with file %s",
1400 hostname, diskname, destname);
1404 dp = lookup_disk(file.name, file.disk);
1407 log_add(L_INFO, "%s: disk %s:%s not in database, skipping it.",
1408 destname, file.name, file.disk);
1412 if(file.dumplevel < 0 || file.dumplevel > 9) {
1413 log_add(L_INFO, "%s: ignoring file with bogus dump level %d.",
1414 destname, file.dumplevel);
1418 dp1 = (disk_t *)alloc(sizeof(disk_t));
1420 dp1->next = dp1->prev = NULL;
1422 /* add it to the flushhost list */
1424 flushhost = alloc(sizeof(host_t));
1425 flushhost->next = NULL;
1426 flushhost->hostname = stralloc("FLUSHHOST");
1427 flushhost->up = NULL;
1428 flushhost->features = NULL;
1430 dp1->hostnext = flushhost->disks;
1431 flushhost->disks = dp1;
1433 sp = (sched_t *) alloc(sizeof(sched_t));
1434 sp->destname = stralloc(destname);
1435 sp->level = file.dumplevel;
1436 sp->dumpdate = NULL;
1437 sp->degr_dumpdate = NULL;
1438 sp->datestamp = stralloc(file.datestamp);
1442 sp->degr_level = -1;
1445 sp->act_size = size_holding_files(destname);
1446 /*sp->holdp = NULL; JLM: must be build*/
1447 sp->holdp = build_diskspace(destname);
1448 if(sp->holdp == NULL) continue;
1450 sp->timestamp = (time_t)0;
1452 dp1->up = (char *)sp;
1454 enqueue_disk(tapeqp, dp1);
1455 flush_size += sp->act_size;
1457 printf("driver: flush size %ld\n", flush_size);
1462 void read_schedule(waitqp, runqp)
1463 disklist_t *waitqp, *runqp;
1467 int level, line, priority;
1468 char *dumpdate, *degr_dumpdate;
1470 long time, degr_time;
1471 unsigned long size, degr_size;
1472 char *hostname, *features, *diskname, *datestamp, *inpline = NULL;
1477 /* read schedule from stdin */
1479 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1485 skip_whitespace(s, ch); /* find the command */
1487 error("schedule line %d: syntax error (no command)", line);
1491 skip_non_whitespace(s, ch);
1494 if(strcmp(command,"DUMP") != 0) {
1495 error("schedule line %d: syntax error (%s != DUMP)", line, command);
1499 skip_whitespace(s, ch); /* find the host name */
1501 error("schedule line %d: syntax error (no host name)", line);
1505 skip_non_whitespace(s, ch);
1508 skip_whitespace(s, ch); /* find the feature list */
1510 error("schedule line %d: syntax error (no feature list)", line);
1514 skip_non_whitespace(s, ch);
1517 skip_whitespace(s, ch); /* find the disk name */
1519 error("schedule line %d: syntax error (no disk name)", line);
1523 skip_non_whitespace(s, ch);
1526 skip_whitespace(s, ch); /* find the datestamp */
1528 error("schedule line %d: syntax error (no datestamp)", line);
1532 skip_non_whitespace(s, ch);
1535 skip_whitespace(s, ch); /* find the priority number */
1536 if(ch == '\0' || sscanf(s - 1, "%d", &priority) != 1) {
1537 error("schedule line %d: syntax error (bad priority)", line);
1540 skip_integer(s, ch);
1542 skip_whitespace(s, ch); /* find the level number */
1543 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1544 error("schedule line %d: syntax error (bad level)", line);
1547 skip_integer(s, ch);
1549 skip_whitespace(s, ch); /* find the dump date */
1551 error("schedule line %d: syntax error (bad dump date)", line);
1555 skip_non_whitespace(s, ch);
1558 skip_whitespace(s, ch); /* find the size number */
1559 if(ch == '\0' || sscanf(s - 1, "%lu", &size) != 1) {
1560 error("schedule line %d: syntax error (bad size)", line);
1563 skip_integer(s, ch);
1565 skip_whitespace(s, ch); /* find the time number */
1566 if(ch == '\0' || sscanf(s - 1, "%ld", &time) != 1) {
1567 error("schedule line %d: syntax error (bad estimated time)", line);
1570 skip_integer(s, ch);
1572 degr_dumpdate = NULL; /* flag if degr fields found */
1573 skip_whitespace(s, ch); /* find the degr level number */
1575 if(sscanf(s - 1, "%d", °r_level) != 1) {
1576 error("schedule line %d: syntax error (bad degr level)", line);
1579 skip_integer(s, ch);
1581 skip_whitespace(s, ch); /* find the degr dump date */
1583 error("schedule line %d: syntax error (bad degr dump date)", line);
1586 degr_dumpdate = s - 1;
1587 skip_non_whitespace(s, ch);
1590 skip_whitespace(s, ch); /* find the degr size number */
1591 if(ch == '\0' || sscanf(s - 1, "%lu", °r_size) != 1) {
1592 error("schedule line %d: syntax error (bad degr size)", line);
1595 skip_integer(s, ch);
1597 skip_whitespace(s, ch); /* find the degr time number */
1598 if(ch == '\0' || sscanf(s - 1, "%lu", °r_time) != 1) {
1599 error("schedule line %d: syntax error (bad degr estimated time)", line);
1602 skip_integer(s, ch);
1605 dp = lookup_disk(hostname, diskname);
1608 "schedule line %d: %s:%s not in disklist, ignored",
1609 line, hostname, diskname);
1613 sp = (sched_t *) alloc(sizeof(sched_t));
1615 sp->dumpdate = stralloc(dumpdate);
1616 sp->est_size = DISK_BLOCK_KB + size; /* include header */
1617 sp->est_time = time;
1618 sp->priority = priority;
1619 sp->datestamp = stralloc(datestamp);
1622 sp->degr_level = degr_level;
1623 sp->degr_dumpdate = stralloc(degr_dumpdate);
1624 sp->degr_size = DISK_BLOCK_KB + degr_size;
1625 sp->degr_time = degr_time;
1627 sp->degr_level = -1;
1628 sp->degr_dumpdate = NULL;
1634 sp->est_kps = size/time;
1636 if(sp->degr_level != -1) {
1640 sp->degr_kps = degr_size/degr_time;
1648 sp->timestamp = (time_t)0;
1649 sp->destname = NULL;
1652 dp->up = (char *) sp;
1653 if(dp->host->features == NULL) {
1654 dp->host->features = am_string_to_feature(features);
1656 remove_disk(waitqp, dp);
1657 insert_disk(&runq, dp, sort_by_time);
1661 log_add(L_WARNING, "WARNING: got empty schedule from planner");
1669 if (ip == (interface_t *)0) {
1673 for(p = lookup_interface(NULL); p != NULL; p = p->next) {
1674 maxusage += p->maxusage;
1675 curusage += p->curusage;
1677 res = maxusage - curusage;
1680 res = ip->maxusage - ip->curusage;
1686 void interface_state(time_str)
1691 printf("driver: interface-state time %s", time_str);
1693 for(ip = lookup_interface(NULL); ip != NULL; ip = ip->next) {
1694 printf(" if %s: free %d", ip->name, free_kps(ip));
1699 void allocate_bandwidth(ip, kps)
1703 ip->curusage += kps;
1706 void deallocate_bandwidth(ip, kps)
1710 assert(kps <= ip->curusage);
1711 ip->curusage -= kps;
1715 unsigned long free_space()
1718 unsigned long total_free;
1722 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
1723 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
1730 assignedhd_t **find_diskspace(size, cur_idle, pref)
1734 /* Rewrite by Peter Conrad <conrad@opus5.de>, June '99:
1735 * - enable splitting a dump across several holding disks
1736 * - allocate only as much as size tells us, dumpers may request more later
1737 * We return an array of pointers to assignedhd_t. The array contains at
1738 * most one entry per holding disk. The list of pointers is terminated by
1739 * a NULL pointer. Each entry contains a pointer to a holdingdisk and
1740 * how much diskspace to use on that disk. Later on, assign_holdingdisk
1741 * will allocate the given amount of space.
1742 * If there is not enough room on the holdingdisks, NULL is returned.
1745 assignedhd_t **result = NULL;
1746 holdingdisk_t *minp, *hdp;
1747 int i=0, num_holdingdisks=0; /* are we allowed to use the global thing? */
1750 long halloc, dalloc, hfree, dfree;
1752 size = am_round(size, DISK_BLOCK_KB);
1755 printf("find diskspace: want %lu K\n", size );
1759 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
1763 used = alloc(sizeof(char) * num_holdingdisks);/*disks used during this run*/
1764 memset( used, 0, num_holdingdisks );
1765 result = alloc( sizeof(assignedhd_t *) * (num_holdingdisks+1) );
1768 while( i < num_holdingdisks && size > 0 ) {
1769 /* find the holdingdisk with the fewest active dumpers and among
1770 * those the one with the biggest free space
1772 minp = NULL; minj = -1;
1773 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next, j++ ) {
1774 if( pref && pref->disk == hdp && !used[j] &&
1775 holdalloc(hdp)->allocated_space <= hdp->disksize - DISK_BLOCK_KB) {
1780 else if( holdalloc(hdp)->allocated_space <= hdp->disksize - 2*DISK_BLOCK_KB &&
1783 holdalloc(hdp)->allocated_dumpers < holdalloc(minp)->allocated_dumpers ||
1784 (holdalloc(hdp)->allocated_dumpers == holdalloc(minp)->allocated_dumpers &&
1785 hdp->disksize-holdalloc(hdp)->allocated_space > minp->disksize-holdalloc(minp)->allocated_space)) ) {
1791 if( !minp ) { break; } /* all holding disks are full */
1794 /* hfree = free space on the disk */
1795 hfree = minp->disksize - holdalloc(minp)->allocated_space;
1797 /* dfree = free space for data, remove 1 header for each chunksize */
1798 dfree = hfree - (((hfree-1)/minp->chunksize)+1) * DISK_BLOCK_KB;
1800 /* dalloc = space I can allocate for data */
1801 dalloc = ( dfree < size ) ? dfree : size;
1803 /* halloc = space to allocate, including 1 header for each chunksize */
1804 halloc = dalloc + (((dalloc-1)/minp->chunksize)+1) * DISK_BLOCK_KB;
1807 fprintf(stdout,"find diskspace: size %ld hf %ld df %ld da %ld ha %ld\n", size, hfree, dfree, dalloc, halloc);
1811 result[i] = alloc(sizeof(assignedhd_t));
1812 result[i]->disk = minp;
1813 result[i]->reserved = halloc;
1814 result[i]->used = 0;
1815 result[i]->destname = NULL;
1818 } /* while i < num_holdingdisks && size > 0 */
1821 if( size ) { /* not enough space available */
1823 printf("find diskspace: not enough diskspace. Left with %lu K\n", size);
1826 free_assignedhd(result);
1831 for( i = 0; result && result[i]; i++ ) {
1832 printf("find diskspace: selected %s free %ld reserved %ld dumpers %d\n",
1833 result[i]->disk->diskdir,
1834 result[i]->disk->disksize - holdalloc(result[i]->disk)->allocated_space,
1835 result[i]->reserved,
1836 holdalloc(result[i]->disk)->allocated_dumpers);
1844 int assign_holdingdisk(holdp, diskp)
1845 assignedhd_t **holdp;
1848 /* Modified by Peter Conrad <conrad@opus5.de>, June '99
1849 * Modifications for splitting dumps across holding disks:
1850 * sched(diskp)->holdp now contains an array of pointers to assignedhd_t.
1854 char *sfn = sanitise_filename(diskp->name);
1856 assignedhd_t **new_holdp;
1858 ap_snprintf( lvl, sizeof(lvl), "%d", sched(diskp)->level );
1860 size = am_round(sched(diskp)->est_size - sched(diskp)->act_size,
1863 for( c = 0; holdp[c]; c++ ); /* count number of disks */
1865 /* allocate memory for sched(diskp)->holdp */
1866 for(j = 0; sched(diskp)->holdp && sched(diskp)->holdp[j]; j++) {}
1867 new_holdp = (assignedhd_t **)alloc(sizeof(assignedhd_t*)*(j+c+1));
1868 if (sched(diskp)->holdp) {
1869 memcpy(new_holdp, sched(diskp)->holdp, j * sizeof(*new_holdp));
1870 amfree(sched(diskp)->holdp);
1872 sched(diskp)->holdp = new_holdp;
1876 if( j > 0 ) { /* This is a request for additional diskspace. See if we can
1877 * merge assignedhd_t's */
1879 if( sched(diskp)->holdp[j-1]->disk == holdp[0]->disk ) { /* Yes! */
1880 sched(diskp)->holdp[j-1]->reserved += holdp[0]->reserved;
1881 holdalloc(holdp[0]->disk)->allocated_space += holdp[0]->reserved;
1882 size = (holdp[0]->reserved>size) ? 0 : size-holdp[0]->reserved;
1884 printf("merging holding disk %s to disk %s:%s, add %lu for reserved %lu, left %lu\n",
1885 sched(diskp)->holdp[j-1]->disk->diskdir,
1886 diskp->host->hostname, diskp->name,
1887 holdp[0]->reserved, sched(diskp)->holdp[j-1]->reserved,
1897 /* copy assignedhd_s to sched(diskp), adjust allocated_space */
1898 for( ; holdp[i]; i++ ) {
1899 holdp[i]->destname = newvstralloc( holdp[i]->destname,
1900 holdp[i]->disk->diskdir, "/",
1902 diskp->host->hostname, ".",
1905 sched(diskp)->holdp[j++] = holdp[i];
1906 holdalloc(holdp[i]->disk)->allocated_space += holdp[i]->reserved;
1907 size = (holdp[i]->reserved>size) ? 0 : size-holdp[i]->reserved;
1909 printf("assigning holding disk %s to disk %s:%s, reserved %lu, left %lu\n",
1910 holdp[i]->disk->diskdir, diskp->host->hostname, diskp->name,
1911 holdp[i]->reserved, size );
1914 holdp[i] = NULL; /* so it doesn't get free()d... */
1916 sched(diskp)->holdp[j] = NULL;
1917 sched(diskp)->destname = newstralloc(sched(diskp)->destname,sched(diskp)->holdp[0]->destname);
1923 static void adjust_diskspace(diskp, cmd)
1927 /* Re-write by Peter Conrad <conrad@opus5.de>, March '99
1928 * Modifications for splitting dumps across holding disks:
1929 * Dumpers no longer write more than they've allocated, therefore an
1930 * adjustment may only free some allocated space.
1931 * 08/99: Jean-Louis suggested that dumpers tell us how much they've written.
1932 * We just believe them and don't stat all the files but rely on the used
1936 assignedhd_t **holdp;
1937 unsigned long total=0;
1942 printf("adjust: %s:%s %s\n", diskp->host->hostname, diskp->name,
1943 sched(diskp)->destname );
1947 holdp = sched(diskp)->holdp;
1951 for( i = 0; holdp[i]; i++ ) { /* for each allocated disk */
1952 diff = holdp[i]->used - holdp[i]->reserved;
1953 total += holdp[i]->used;
1954 holdalloc(holdp[i]->disk)->allocated_space += diff;
1956 printf("adjust: hdisk %s done, reserved %ld used %ld diff %ld alloc %ld dumpers %d\n",
1957 holdp[i]->disk->name, holdp[i]->reserved, holdp[i]->used, diff,
1958 holdalloc(holdp[i]->disk)->allocated_space,
1959 holdalloc(holdp[i]->disk)->allocated_dumpers );
1962 holdp[i]->reserved += diff;
1965 sched(diskp)->act_size = total;
1967 printf("adjust: after: disk %s:%s used %ld\n", diskp->host->hostname,
1968 diskp->name, sched(diskp)->act_size );
1973 static void delete_diskspace(diskp)
1976 /* Re-write by Peter Conrad <conrad@opus5.de>, March '99
1977 * Modifications for splitting dumps across holding disks:
1978 * After implementing Jean-Louis' suggestion (see above) this looks much
1979 * simpler... again, we rely on assignedhd_s containing correct info
1981 assignedhd_t **holdp;
1984 holdp = sched(diskp)->holdp;
1988 for( i = 0; holdp[i]; i++ ) { /* for each disk */
1989 /* find all files of this dump on that disk, and subtract their
1990 * reserved sizes from the disk's allocated space
1992 holdalloc(holdp[i]->disk)->allocated_space -= holdp[i]->used;
1995 unlink_holding_files(holdp[0]->destname); /* no need for the entire list,
1996 because unlink_holding_files
1997 will walk through all files
1998 using cont_filename */
2000 free_assignedhd(sched(diskp)->holdp);
2001 sched(diskp)->holdp = NULL;
2002 sched(diskp)->act_size = 0;
2003 amfree(sched(diskp)->destname);
2006 assignedhd_t **build_diskspace(destname)
2012 char buffer[DISK_BLOCK_BYTES];
2014 assignedhd_t **result;
2017 int num_holdingdisks=0;
2018 char dirname[1000], *ch;
2020 char *filename = destname;
2022 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2025 used = alloc(sizeof(int) * num_holdingdisks);
2026 for(i=0;i<num_holdingdisks;i++)
2028 result = alloc( sizeof(assignedhd_t *) * (num_holdingdisks+1) );
2030 while(filename != NULL && filename[0] != '\0') {
2031 strncpy(dirname, filename, 999);
2033 ch = strrchr(dirname,'/');
2035 ch = strrchr(dirname,'/');
2038 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL;
2039 hdp = hdp->next, j++ ) {
2040 if(strcmp(dirname,hdp->diskdir)==0) {
2045 if(stat(filename, &finfo) == -1) {
2046 fprintf(stderr, "stat %s: %s\n", filename, strerror(errno));
2049 used[j] += (finfo.st_size+1023)/1024;
2050 if((fd = open(filename,O_RDONLY)) == -1) {
2051 fprintf(stderr,"build_diskspace: open of %s failed: %s\n",
2052 filename, strerror(errno));
2055 buflen = fullread(fd, buffer, sizeof(buffer));
2056 parse_file_header(buffer, &file, buflen);
2058 filename = file.cont_filename;
2061 for(j = 0, i=0, hdp = getconf_holdingdisks(); hdp != NULL;
2062 hdp = hdp->next, j++ ) {
2064 result[i] = alloc(sizeof(assignedhd_t));
2065 result[i]->disk = hdp;
2066 result[i]->reserved = used[j];
2067 result[i]->used = used[j];
2068 result[i]->destname = stralloc(destname);
2079 void holdingdisk_state(time_str)
2086 printf("driver: hdisk-state time %s", time_str);
2088 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
2089 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2090 printf(" hdisk %d: free %ld dumpers %d", dsk, diff,
2091 holdalloc(hdp)->allocated_dumpers);
2096 static void update_failed_dump_to_tape(dp)
2099 time_t save_timestamp = sched(dp)->timestamp;
2100 /* setting timestamp to 0 removes the current level from the
2101 * database, so that we ensure that it will not be bumped to the
2102 * next level on the next run. If we didn't do this, dumpdates or
2103 * gnutar-lists might have been updated already, and a bumped
2104 * incremental might be created. */
2105 sched(dp)->timestamp = 0;
2106 update_info_dumper(dp, -1, -1, -1);
2107 sched(dp)->timestamp = save_timestamp;
2110 /* ------------------- */
2111 int dump_to_tape(dp)
2122 char *result_argv[MAX_ARGS+1];
2124 inside_dump_to_tape = 1; /* for simulator */
2126 printf("driver: dumping %s:%s directly to tape\n",
2127 dp->host->hostname, dp->name);
2130 /* pick a dumper and fail if there are no idle dumpers */
2132 dumper = idle_dumper();
2134 printf("driver: no idle dumpers for %s:%s.\n",
2135 dp->host->hostname, dp->name);
2137 log_add(L_WARNING, "no idle dumpers for %s:%s.\n",
2138 dp->host->hostname, dp->name);
2139 inside_dump_to_tape = 0;
2140 return 2; /* fatal problem */
2143 /* tell the taper to read from a port number of its choice */
2145 taper_cmd(PORT_WRITE, dp, NULL, sched(dp)->level, sched(dp)->datestamp);
2146 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2148 printf("driver: did not get PORT from taper for %s:%s\n",
2149 dp->host->hostname, dp->name);
2151 inside_dump_to_tape = 0;
2152 return 2; /* fatal problem */
2154 /* copy port number */
2155 sched(dp)->destname = newvstralloc(sched(dp)->destname, result_argv[2], NULL );
2157 /* tell the dumper to dump to a port */
2159 dumper_cmd(dumper, PORT_DUMP, dp);
2160 dp->host->start_t = time(NULL) + 15;
2162 /* update statistics & print state */
2164 taper_busy = dumper->busy = 1;
2165 dp->host->inprogress += 1;
2167 sched(dp)->timestamp = time((time_t *)0);
2168 allocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2173 /* wait for result from dumper */
2175 cmd = getresult(dumper->outfd, 1, &result_argc, result_argv, MAX_ARGS+1);
2178 free_serial(result_argv[2]);
2182 /* either eof or garbage from dumper */
2183 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
2184 dumper->name, (long)dumper->pid);
2185 dumper->down = 1; /* mark it down so it isn't used again */
2186 failed = 1; /* dump failed, must still finish up with taper */
2189 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <err str> */
2190 /* everything went fine */
2191 origsize = (long)atof(result_argv[3]);
2192 dumpsize = (long)atof(result_argv[4]);
2193 dumptime = (long)atof(result_argv[5]);
2196 case NO_ROOM: /* NO-ROOM <handle> */
2197 dumper_cmd(dumper, ABORT, dp);
2198 cmd = getresult(dumper->outfd, 1, &result_argc, result_argv, MAX_ARGS+1);
2200 free_serial(result_argv[2]);
2201 assert(cmd == ABORT_FINISHED);
2203 case TRYAGAIN: /* TRY-AGAIN <handle> <err str> */
2205 /* dump failed, but we must still finish up with taper */
2206 failed = 1; /* problem with dump, possibly nonfatal */
2209 case FAILED: /* FAILED <handle> <errstr> */
2210 /* dump failed, but we must still finish up with taper */
2211 failed = 2; /* fatal problem with dump */
2216 * Note that at this point, even if the dump above failed, it may
2217 * not be a fatal failure if taper below says we can try again.
2218 * E.g. a dumper failure above may actually be the result of a
2219 * tape overflow, which in turn causes dump to see "broken pipe",
2220 * "no space on device", etc., since taper closed the port first.
2223 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2226 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
2227 if(result_argc != 5) {
2228 error("error [dump to tape DONE result_argc != 5: %d]", result_argc);
2231 if(failed == 1) goto tryagain; /* dump didn't work */
2232 else if(failed == 2) goto failed_dumper;
2234 free_serial(result_argv[2]);
2236 /* every thing went fine */
2237 update_info_dumper(dp, origsize, dumpsize, dumptime);
2238 filenum = atoi(result_argv[4]);
2239 update_info_taper(dp, result_argv[3], filenum, sched(dp)->level);
2240 /* note that update_info_dumper() must be run before
2241 update_info_taper(), since update_info_dumper overwrites
2242 tape information. */
2246 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
2248 headqueue_disk(&runq, dp);
2250 update_failed_dump_to_tape(dp);
2251 free_serial(result_argv[2]);
2252 tape_left = tape_length;
2256 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
2259 update_failed_dump_to_tape(dp);
2260 free_serial(result_argv[2]);
2261 failed = 2; /* fatal problem */
2262 start_degraded_mode(&runq);
2265 /* reset statistics & return */
2267 taper_busy = dumper->busy = 0;
2268 dp->host->inprogress -= 1;
2270 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2272 inside_dump_to_tape = 0;
2282 for(len = 0, p = q.head; p != NULL; len++, p = p->next);
2287 void short_dump_state()
2292 wall_time = walltime_str(curclock());
2294 printf("driver: state time %s ", wall_time);
2295 printf("free kps: %d space: %lu taper: ",
2296 free_kps((interface_t *)0), free_space());
2297 if(degraded_mode) printf("DOWN");
2298 else if(!taper_busy) printf("idle");
2299 else printf("writing");
2301 for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
2302 printf(" idle-dumpers: %d", nidle);
2303 printf(" qlen tapeq: %d", queue_length(tapeq));
2304 printf(" runq: %d", queue_length(runq));
2305 printf(" roomq: %d", queue_length(roomq));
2306 printf(" wakeup: %d", (int)sleep_time.tv_sec);
2307 printf(" driver-idle: %s\n", idle_strings[idle_reason]);
2308 interface_state(wall_time);
2309 holdingdisk_state(wall_time);
2313 void dump_state(str)
2319 printf("================\n");
2320 printf("driver state at time %s: %s\n", walltime_str(curclock()), str);
2321 printf("free kps: %d, space: %lu\n", free_kps((interface_t *)0), free_space());
2322 if(degraded_mode) printf("taper: DOWN\n");
2323 else if(!taper_busy) printf("taper: idle\n");
2324 else printf("taper: writing %s:%s.%d est size %lu\n",
2325 taper_disk->host->hostname, taper_disk->name,
2326 sched(taper_disk)->level,
2327 sched(taper_disk)->est_size);
2328 for(i = 0; i < inparallel; i++) {
2329 dp = dmptable[i].dp;
2330 if(!dmptable[i].busy)
2331 printf("%s: idle\n", dmptable[i].name);
2333 printf("%s: dumping %s:%s.%d est kps %d size %lu time %ld\n",
2334 dmptable[i].name, dp->host->hostname, dp->name, sched(dp)->level,
2335 sched(dp)->est_kps, sched(dp)->est_size, sched(dp)->est_time);
2337 dump_queue("TAPE", tapeq, 5, stdout);
2338 dump_queue("ROOM", roomq, 5, stdout);
2339 dump_queue("RUN ", runq, 5, stdout);
2340 printf("================\n");