2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-2000 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: driver.c,v 1.58.2.31.2.8.2.20.2.16 2005/09/20 21:31:52 jrjackson Exp $
29 * controlling process for the Amanda backup system
33 * XXX possibly modify tape queue to be cognizant of how much room is left on
34 * tape. Probably not effective though, should do this in planner.
47 #include "server_util.h"
49 disklist_t waitq, runq, tapeq, roomq;
50 int pending_aborts, inside_dump_to_tape;
53 unsigned long reserved_space;
54 unsigned long total_disksize;
58 unsigned long tape_length, tape_left = 0;
60 am_host_t *flushhost = NULL;
62 int client_constrained P((disk_t *dp));
63 int sort_by_priority_reversed P((disk_t *a, disk_t *b));
64 int sort_by_time P((disk_t *a, disk_t *b));
65 int start_some_dumps P((disklist_t *rq));
66 void dump_schedule P((disklist_t *qp, char *str));
67 void start_degraded_mode P((disklist_t *queuep));
68 void handle_taper_result P((void));
69 dumper_t *idle_dumper P((void));
70 int some_dumps_in_progress P((void));
71 int num_busy_dumpers P((void));
72 dumper_t *lookup_dumper P((int fd));
73 void handle_dumper_result P((int fd));
74 void read_flush P((disklist_t *tapeqp));
75 void read_schedule P((disklist_t *waitqp, disklist_t *runqp));
76 int free_kps P((interface_t *ip));
77 void interface_state P((char *time_str));
78 void allocate_bandwidth P((interface_t *ip, int kps));
79 void deallocate_bandwidth P((interface_t *ip, int kps));
80 unsigned long free_space P((void));
81 assignedhd_t **find_diskspace P((unsigned long size, int *cur_idle, assignedhd_t *preferred));
82 char *diskname2filename P((char *dname));
83 int assign_holdingdisk P((assignedhd_t **holdp, disk_t *diskp));
84 static void adjust_diskspace P((disk_t *diskp, cmd_t cmd));
85 static void delete_diskspace P((disk_t *diskp));
86 assignedhd_t **build_diskspace P((char *destname));
87 void holdingdisk_state P((char *time_str));
88 int dump_to_tape P((disk_t *dp));
89 int queue_length P((disklist_t q));
90 void short_dump_state P((void));
91 void dump_state P((char *str));
92 void startaflush P((void));
93 int main P((int main_argc, char **main_argv));
95 static int idle_reason;
99 char *idle_strings[] = {
102 #define IDLE_START_WAIT 1
104 #define IDLE_NO_DUMPERS 2
106 #define IDLE_NO_HOLD 3
108 #define IDLE_CLIENT_CONSTRAINED 4
109 "client-constrained",
110 #define IDLE_NO_DISKSPACE 5
112 #define IDLE_TOO_LARGE 6
114 #define IDLE_NO_BANDWIDTH 7
116 #define IDLE_TAPER_WAIT 8
120 #define SLEEP_MAX (24*3600)
121 struct timeval sleep_time = { SLEEP_MAX, 0 };
122 /* enabled if any disks are in start-wait: */
123 int any_delayed_disk = 0;
125 int main(main_argc, main_argv)
135 generic_fs_stats_t fs;
137 unsigned long malloc_hist_1, malloc_size_1;
138 unsigned long malloc_hist_2, malloc_size_2;
139 unsigned long reserve = 100;
144 char *result_argv[MAX_ARGS+1];
152 setvbuf(stdout, (char *)NULL, _IOLBF, 0);
153 setvbuf(stderr, (char *)NULL, _IOLBF, 0);
157 signal(SIGPIPE, SIG_IGN);
159 malloc_size_1 = malloc_inuse(&malloc_hist_1);
161 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
162 set_logerror(logerror);
167 printf("%s: pid %ld executable %s version %s\n",
168 get_pname(), (long) getpid(), main_argv[0], version());
171 config_name = stralloc(main_argv[1]);
172 config_dir = vstralloc(CONFIG_DIR, "/", config_name, "/", NULL);
174 if(strncmp(main_argv[2], "nodump", 6) == 0) {
180 char my_cwd[STR_SIZE];
182 if (getcwd(my_cwd, sizeof(my_cwd)) == NULL) {
183 error("cannot determine current working directory");
185 config_dir = stralloc2(my_cwd, "/");
186 if ((config_name = strrchr(my_cwd, '/')) != NULL) {
187 config_name = stralloc(config_name + 1);
193 conffile = stralloc2(config_dir, CONFFILE_NAME);
194 if(read_conffile(conffile)) {
195 error("errors processing config file \"%s\"", conffile);
200 datestamp = construct_datestamp(NULL);
201 timestamp = construct_timestamp(NULL);
202 log_add(L_START,"date %s", datestamp);
204 taper_program = vstralloc(libexecdir, "/", "taper", versionsuffix(), NULL);
205 dumper_program = vstralloc(libexecdir, "/", "dumper", versionsuffix(),
208 conf_taperalgo = getconf_int(CNF_TAPERALGO);
209 conf_tapetype = getconf_str(CNF_TAPETYPE);
210 tape = lookup_tapetype(conf_tapetype);
211 tape_length = tape->length;
212 printf("driver: tape size %ld\n", tape_length);
214 /* taper takes a while to get going, so start it up right away */
217 startup_tape_process(taper_program);
218 taper_cmd(START_TAPER, datestamp, NULL, 0, NULL);
220 /* start initializing: read in databases */
222 conf_diskfile = getconf_str(CNF_DISKFILE);
223 if (*conf_diskfile == '/') {
224 conf_diskfile = stralloc(conf_diskfile);
226 conf_diskfile = stralloc2(config_dir, conf_diskfile);
228 if((origqp = read_diskfile(conf_diskfile)) == NULL) {
229 error("could not load disklist \"%s\"", conf_diskfile);
231 amfree(conf_diskfile);
233 /* set up any configuration-dependent variables */
235 inparallel = getconf_int(CNF_INPARALLEL);
237 reserve = getconf_int(CNF_RESERVE);
240 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
241 hdp->up = (void *)alloc(sizeof(holdalloc_t));
242 holdalloc(hdp)->allocated_dumpers = 0;
243 holdalloc(hdp)->allocated_space = 0L;
245 if(get_fs_stats(hdp->diskdir, &fs) == -1
246 || access(hdp->diskdir, W_OK) == -1) {
247 log_add(L_WARNING, "WARNING: ignoring holding disk %s: %s\n",
248 hdp->diskdir, strerror(errno));
254 if(hdp->disksize > 0) {
255 if(hdp->disksize > fs.avail) {
257 "WARNING: %s: %ld KB requested, but only %ld KB available.",
258 hdp->diskdir, hdp->disksize, fs.avail);
259 hdp->disksize = fs.avail;
262 else if(fs.avail + hdp->disksize < 0) {
264 "WARNING: %s: not %ld KB free.",
265 hdp->diskdir, -hdp->disksize);
270 hdp->disksize += fs.avail;
273 printf("driver: adding holding disk %d dir %s size %ld chunksize %ld\n",
274 dsk, hdp->diskdir, hdp->disksize, hdp->chunksize);
276 newdir = newvstralloc(newdir,
277 hdp->diskdir, "/", timestamp,
279 if(!mkholdingdir(newdir)) {
282 total_disksize += hdp->disksize;
285 reserved_space = total_disksize * (reserve / 100.0);
287 printf("reserving %ld out of %ld for degraded-mode dumps\n",
288 reserved_space, free_space());
292 if(inparallel > MAX_DUMPERS) inparallel = MAX_DUMPERS;
294 /* fire up the dumpers now while we are waiting */
296 if(!nodump) startup_dump_processes(dumper_program, inparallel);
299 * Read schedule from stdin. Usually, this is a pipe from planner,
300 * so the effect is that we wait here for the planner to
301 * finish, but meanwhile the taper is rewinding the tape, reading
302 * the label, checking it, writing a new label and all that jazz
303 * in parallel with the planner.
307 tapeq.head = tapeq.tail = NULL;
308 roomq.head = roomq.tail = NULL;
309 runq.head = runq.tail = NULL;
313 log_add(L_STATS, "startup time %s", walltime_str(curclock()));
315 printf("driver: start time %s inparallel %d bandwidth %d diskspace %lu",
316 walltime_str(curclock()), inparallel, free_kps((interface_t *)0),
318 printf(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n",
319 "OBSOLETE", datestamp, taperalgo2str(conf_taperalgo),
320 getconf_str(CNF_DUMPORDER));
323 /* Let's see if the tape is ready */
325 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
327 if(cmd != TAPER_OK) {
328 /* no tape, go into degraded mode: dump to holding disk */
329 start_degraded_mode(&runq);
330 FD_CLR(taper,&readset);
333 short_dump_state(); /* for amstatus */
335 tape_left = tape_length;
339 /* Start autoflush while waiting for dump schedule */
341 /* Start any autoflush tape writes */
344 short_dump_state(); /* for amstatus */
346 /* Process taper results until the schedule arrives */
349 FD_SET(0, &selectset);
350 FD_SET(taper, &selectset);
352 if(select(taper+1, (SELECT_ARG_TYPE *)(&selectset), NULL, NULL,
354 error("select: %s", strerror(errno));
355 if (FD_ISSET(0, &selectset)) break; /* schedule arrived */
356 if (FD_ISSET(taper, &selectset)) handle_taper_result();
357 short_dump_state(); /* for amstatus */
362 /* Read the dump schedule */
363 read_schedule(&waitq, &runq);
366 /* Start any needed flushes */
369 while(start_some_dumps(&runq) || some_dumps_in_progress() ||
373 /* wait for results */
375 memcpy(&selectset, &readset, sizeof(fd_set));
376 if(select(maxfd+1, (SELECT_ARG_TYPE *)(&selectset),
377 NULL, NULL, &sleep_time) == -1)
378 error("select: %s", strerror(errno));
380 /* handle any results that have come in */
382 for(fd = 0; fd <= maxfd; fd++) {
384 * The first pass through the following loop, we have
385 * data ready for areads (called by getresult, called by
386 * handle_.*_result). But that may read more than one record,
387 * so we need to keep processing as long as areads has data.
388 * We will get control back after each record and the buffer
389 * will go empty (indicated by areads_dataready(fd) == 0)
390 * after the last one available has been processed.
392 while(FD_ISSET(fd, &selectset) || areads_dataready(fd) > 0) {
393 if(fd == taper) handle_taper_result();
394 else handle_dumper_result(fd);
395 FD_CLR(fd, &selectset);
401 /* handle any remaining dumps by dumping directly to tape, if possible */
403 while(!empty(runq)) {
404 diskp = dequeue_disk(&runq);
406 int rc = dump_to_tape(diskp);
409 "%s %s %d [dump to tape failed, will try again]",
410 diskp->host->hostname,
412 sched(diskp)->level);
414 log_add(L_FAIL, "%s %s %s %d [dump to tape failed]",
415 diskp->host->hostname,
417 sched(diskp)->datestamp,
418 sched(diskp)->level);
421 log_add(L_FAIL, "%s %s %s %d [%s]",
422 diskp->host->hostname, diskp->name,
423 sched(diskp)->datestamp, sched(diskp)->level,
425 "can't dump no-hold disk in degraded mode" :
426 "no more holding disk space");
429 short_dump_state(); /* for amstatus */
431 printf("driver: QUITTING time %s telling children to quit\n",
432 walltime_str(curclock()));
436 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
437 dumper_cmd(dumper, QUIT, NULL);
442 taper_cmd(QUIT, NULL, NULL, 0, NULL);
445 /* wait for all to die */
448 char number[NUM_STR_SIZE];
454 if((pid = wait(&retstat)) == -1) {
455 if(errno == EINTR) continue;
459 if(! WIFEXITED(retstat)) {
461 code = WTERMSIG(retstat);
462 } else if(WEXITSTATUS(retstat) != 0) {
464 code = WEXITSTATUS(retstat);
467 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
468 if(pid == dumper->pid) {
469 who = stralloc(dumper->name);
473 if(who == NULL && pid == taper_pid) {
474 who = stralloc("taper");
476 if(what != NULL && who == NULL) {
477 ap_snprintf(number, sizeof(number), "%ld", (long)pid);
478 who = stralloc2("unknown pid ", number);
481 log_add(L_WARNING, "%s exited with %s %d\n", who, what, code);
482 printf("driver: %s exited with %s %d\n", who, what, code);
487 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
488 amfree(dumper->name);
491 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
492 cleanup_holdingdisk(hdp->diskdir, 0);
497 printf("driver: FINISHED time %s\n", walltime_str(curclock()));
499 log_add(L_FINISH,"date %s time %s", datestamp, walltime_str(curclock()));
503 amfree(dumper_program);
504 amfree(taper_program);
508 malloc_size_2 = malloc_inuse(&malloc_hist_2);
510 if(malloc_size_1 != malloc_size_2) {
511 malloc_list(fileno(stderr), malloc_hist_1, malloc_hist_2);
522 if(!degraded_mode && !taper_busy && !empty(tapeq)) {
523 datestamp = sched(tapeq.head)->datestamp;
524 switch(conf_taperalgo) {
526 dp = dequeue_disk(&tapeq);
530 while (fit != NULL) {
531 if(sched(fit)->act_size <= tape_left &&
532 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
540 if(dp) remove_disk(&tapeq, dp);
543 fit = dp = tapeq.head;
544 while (fit != NULL) {
545 if(sched(fit)->act_size > sched(dp)->act_size &&
546 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
551 if(dp) remove_disk(&tapeq, dp);
553 case ALGO_LARGESTFIT:
555 while (fit != NULL) {
556 if(sched(fit)->act_size <= tape_left &&
557 (!dp || sched(fit)->act_size > sched(dp)->act_size) &&
558 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
563 if(dp) remove_disk(&tapeq, dp);
569 remove_disk(&tapeq, dp);
572 if(!dp) { /* ALGO_SMALLEST, or default if nothing fit. */
573 if(conf_taperalgo != ALGO_SMALLEST) {
575 "driver: startaflush: Using SMALLEST because nothing fit\n");
577 fit = dp = tapeq.head;
578 while (fit != NULL) {
579 if(sched(fit)->act_size < sched(dp)->act_size &&
580 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
585 if(dp) remove_disk(&tapeq, dp);
589 taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
590 sched(dp)->datestamp);
591 fprintf(stderr,"driver: startaflush: %s %s %s %ld %ld\n",
592 taperalgo2str(conf_taperalgo), dp->host->hostname,
593 dp->name, sched(taper_disk)->act_size, tape_left);
594 if(sched(dp)->act_size <= tape_left)
595 tape_left -= sched(dp)->act_size;
602 int client_constrained(dp)
607 /* first, check if host is too busy */
609 if(dp->host->inprogress >= dp->host->maxdumps) {
613 /* next, check conflict with other dumps on same spindle */
615 if(dp->spindle == -1) { /* but spindle -1 never conflicts by def. */
619 for(dp2 = dp->host->disks; dp2 != NULL; dp2 = dp2->hostnext)
620 if(dp2->inprogress && dp2->spindle == dp->spindle) {
627 int start_some_dumps(rq)
631 disk_t *diskp, *diskp_accept;
633 assignedhd_t **holdp=NULL, **holdp_accept;
634 time_t now = time(NULL);
637 idle_reason = IDLE_NO_DUMPERS;
638 sleep_time.tv_sec = SLEEP_MAX;
639 sleep_time.tv_usec = 0;
640 any_delayed_disk = 0;
642 if(rq->head == NULL) {
648 * A potential problem with starting from the bottom of the dump time
649 * distribution is that a slave host will have both one of the shortest
650 * and one of the longest disks, so starting its shortest disk first will
651 * tie up the host and eliminate its longest disk from consideration the
652 * first pass through. This could cause a big delay in starting that long
653 * disk, which could drag out the whole night's dumps.
655 * While starting from the top of the dump time distribution solves the
656 * above problem, this turns out to be a bad idea, because the big dumps
657 * will almost certainly pack the holding disk completely, leaving no
658 * room for even one small dump to start. This ends up shutting out the
659 * small-end dumpers completely (they stay idle).
661 * The introduction of multiple simultaneous dumps to one host alleviates
662 * the biggest&smallest dumps problem: both can be started at the
665 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++) {
666 if(dumper->busy || dumper->down) continue;
667 /* found an idle dumper, now find a disk for it */
672 if(idle_reason == IDLE_NO_DUMPERS)
673 idle_reason = NOT_IDLE;
678 assert(diskp->host != NULL && sched(diskp) != NULL);
680 /* round estimate to next multiple of DISK_BLOCK_KB */
681 sched(diskp)->est_size = am_round(sched(diskp)->est_size,
684 if(diskp->host->start_t > now) {
685 cur_idle = max(cur_idle, IDLE_START_WAIT);
686 sleep_time.tv_sec = min(diskp->host->start_t - now,
688 any_delayed_disk = 1;
690 else if(diskp->start_t > now) {
691 cur_idle = max(cur_idle, IDLE_START_WAIT);
692 sleep_time.tv_sec = min(diskp->start_t - now,
694 any_delayed_disk = 1;
696 else if(diskp->host->netif->curusage > 0 &&
697 sched(diskp)->est_kps > free_kps(diskp->host->netif))
698 cur_idle = max(cur_idle, IDLE_NO_BANDWIDTH);
699 else if(sched(diskp)->no_space)
700 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
701 else if((holdp = find_diskspace(sched(diskp)->est_size,&cur_idle,NULL)) == NULL)
702 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
703 else if(diskp->no_hold) {
704 free_assignedhd(holdp);
705 cur_idle = max(cur_idle, IDLE_NO_HOLD);
706 } else if(client_constrained(diskp)) {
707 free_assignedhd(holdp);
708 cur_idle = max(cur_idle, IDLE_CLIENT_CONSTRAINED);
711 /* disk fits, dump it */
712 int accept = !diskp_accept;
715 char *dumporder = getconf_str(CNF_DUMPORDER);
716 if(strlen(dumporder) <= (dumper-dmptable)) {
717 if(dumper-dmptable < 3)
723 dumptype = dumporder[dumper-dmptable];
726 case 's': accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
728 case 'S': accept = (sched(diskp)->est_size > sched(diskp_accept)->est_size);
730 case 't': accept = (sched(diskp)->est_time < sched(diskp_accept)->est_time);
732 case 'T': accept = (sched(diskp)->est_time > sched(diskp_accept)->est_time);
734 case 'b': accept = (sched(diskp)->est_kps < sched(diskp_accept)->est_kps);
736 case 'B': accept = (sched(diskp)->est_kps > sched(diskp_accept)->est_kps);
738 default: log_add(L_WARNING, "Unknown dumporder character \'%c\', using 's'.\n",
740 accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
745 if( !diskp_accept || !degraded_mode || diskp->priority >= diskp_accept->priority) {
746 if(holdp_accept) free_assignedhd(holdp_accept);
747 diskp_accept = diskp;
748 holdp_accept = holdp;
751 free_assignedhd(holdp);
755 free_assignedhd(holdp);
761 diskp = diskp_accept;
762 holdp = holdp_accept;
765 sched(diskp)->act_size = 0;
766 allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
767 sched(diskp)->activehd = assign_holdingdisk(holdp, diskp);
769 diskp->host->inprogress += 1; /* host is now busy */
770 diskp->inprogress = 1;
771 sched(diskp)->dumper = dumper;
772 sched(diskp)->timestamp = time((time_t *)0);
774 dumper->busy = 1; /* dumper is now busy */
775 dumper->dp = diskp; /* link disk to dumper */
777 remove_disk(rq, diskp); /* take it off the run queue */
778 dumper_cmd(dumper, FILE_DUMP, diskp);
779 diskp->host->start_t = time(NULL) + 15;
781 idle_reason = max(idle_reason, cur_idle);
786 int sort_by_priority_reversed(a, b)
789 if(sched(b)->priority - sched(a)->priority != 0)
790 return sched(b)->priority - sched(a)->priority;
792 return sort_by_time(a, b);
795 int sort_by_time(a, b)
800 if ((diff = sched(a)->est_time - sched(b)->est_time) < 0) {
802 } else if (diff > 0) {
809 void dump_schedule(qp, str)
815 printf("dump of driver schedule %s:\n--------\n", str);
817 for(dp = qp->head; dp != NULL; dp = dp->next) {
818 printf(" %-20s %-25s lv %d t %5ld s %8lu p %d\n",
819 dp->host->hostname, dp->name, sched(dp)->level,
820 sched(dp)->est_time, sched(dp)->est_size, sched(dp)->priority);
822 printf("--------\n");
826 void start_degraded_mode(queuep)
831 unsigned long est_full_size;
833 newq.head = newq.tail = 0;
835 dump_schedule(queuep, "before start degraded mode");
838 while(!empty(*queuep)) {
839 dp = dequeue_disk(queuep);
841 if(sched(dp)->level != 0)
842 /* go ahead and do the disk as-is */
843 insert_disk(&newq, dp, sort_by_priority_reversed);
845 if (reserved_space + est_full_size + sched(dp)->est_size
847 insert_disk(&newq, dp, sort_by_priority_reversed);
848 est_full_size += sched(dp)->est_size;
850 else if(sched(dp)->degr_level != -1) {
851 sched(dp)->level = sched(dp)->degr_level;
852 sched(dp)->dumpdate = sched(dp)->degr_dumpdate;
853 sched(dp)->est_size = sched(dp)->degr_size;
854 sched(dp)->est_time = sched(dp)->degr_time;
855 sched(dp)->est_kps = sched(dp)->degr_kps;
856 insert_disk(&newq, dp, sort_by_priority_reversed);
859 log_add(L_FAIL, "%s %s %s %d [can't switch to incremental dump]",
860 dp->host->hostname, dp->name,
861 sched(dp)->datestamp, sched(dp)->level);
869 dump_schedule(queuep, "after start degraded mode");
872 void continue_dumps()
876 int active_dumpers=0, busy_dumpers=0, i;
879 /* First we try to grant diskspace to some dumps waiting for it. */
880 for( dp = roomq.head; dp; dp = ndp ) {
882 /* find last holdingdisk used by this dump */
883 for( i = 0, h = sched(dp)->holdp; h[i+1]; i++ );
884 /* find more space */
885 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size, &active_dumpers, h[i] );
887 for(dumper = dmptable; dumper < dmptable + inparallel &&
888 dumper->dp != dp; dumper++);
889 assert( dumper < dmptable + inparallel );
890 sched(dp)->activehd = assign_holdingdisk( h, dp );
891 dumper_cmd( dumper, CONTINUE, dp );
893 remove_disk( &roomq, dp );
897 /* So for some disks there is less holding diskspace available than
898 * was asked for. Possible reasons are
899 * a) diskspace has been allocated for other dumps which are
900 * still running or already being written to tape
901 * b) all other dumps have been suspended due to lack of diskspace
902 * c) this dump doesn't fit on all the holding disks
903 * Case a) is not a problem. We just wait for the diskspace to
904 * be freed by moving the current disk to a queue.
905 * If case b) occurs, we have a deadlock situation. We select
906 * a dump from the queue to be aborted and abort it. It will
907 * be retried later dumping to disk.
908 * If case c) is detected, the dump is aborted. Next time
909 * it will be dumped directly to tape. Actually, case c is a special
910 * manifestation of case b) where only one dumper is busy.
912 for( dp=NULL, dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
915 if( !find_disk(&roomq, dumper->dp) ) {
917 } else if( !dp || sched(dp)->est_size > sched(dumper->dp)->est_size ) {
922 if( !active_dumpers && busy_dumpers > 0 &&
923 ((!taper_busy && empty(tapeq)) || degraded_mode) &&
924 pending_aborts == 0 ) { /* not case a */
925 if( busy_dumpers == 1 ) { /* case c */
926 sched(dp)->no_space = 1;
929 /* At this time, dp points to the dump with the smallest est_size.
930 * We abort that dump, hopefully not wasting too much time retrying it.
932 remove_disk( &roomq, dp );
933 dumper_cmd( sched(dp)->dumper, ABORT, NULL );
938 void handle_taper_result()
944 char *result_argv[MAX_ARGS+1];
946 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
950 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
951 if(result_argc != 5) {
952 error("error: [taper DONE result_argc != 5: %d", result_argc);
955 dp = serial2disk(result_argv[2]);
956 free_serial(result_argv[2]);
958 filenum = atoi(result_argv[4]);
959 update_info_taper(dp, result_argv[3], filenum, sched(dp)->level);
961 delete_diskspace(dp);
963 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
964 walltime_str(curclock()), dp->host->hostname, dp->name);
967 amfree(sched(dp)->dumpdate);
968 amfree(sched(dp)->degr_dumpdate);
969 amfree(sched(dp)->datestamp);
975 continue_dumps(); /* continue with those dumps waiting for diskspace */
978 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
979 if (result_argc < 2) {
980 error("error [taper TRYAGAIN result_argc < 2: %d]", result_argc);
982 dp = serial2disk(result_argv[2]);
983 free_serial(result_argv[2]);
984 printf("driver: taper-tryagain time %s disk %s:%s\n",
985 walltime_str(curclock()), dp->host->hostname, dp->name);
988 /* re-insert into taper queue */
990 if(sched(dp)->attempted) {
991 log_add(L_FAIL, "%s %s %d %s [too many taper retries]",
992 dp->host->hostname, dp->name, sched(dp)->level,
993 sched(dp)->datestamp);
994 printf("driver: taper failed %s %s %s, too many taper retry\n", result_argv[2], dp->host->hostname, dp->name);
997 sched(dp)->attempted++;
998 headqueue_disk(&tapeq, dp);
1001 tape_left = tape_length;
1003 /* run next thing from queue */
1007 continue_dumps(); /* continue with those dumps waiting for diskspace */
1011 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
1012 dp = serial2disk(result_argv[2]);
1013 free_serial(result_argv[2]);
1014 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1015 walltime_str(curclock()), dp->host->hostname, dp->name);
1017 /* Note: fall through code... */
1021 * Since we've gotten a tape error, we can't send anything more
1022 * to the taper. Go into degraded mode to try to get everthing
1023 * onto disk. Later, these dumps can be flushed to a new tape.
1024 * The tape queue is zapped so that it appears empty in future
1025 * checks. If there are dumps waiting for diskspace to be freed,
1030 "going into degraded mode because of tape error.");
1032 start_degraded_mode(&runq);
1035 tapeq.head = tapeq.tail = NULL;
1036 FD_CLR(taper,&readset);
1037 if(cmd != TAPE_ERROR) aclose(taper);
1041 error("driver received unexpected token (%d) from taper", cmd);
1046 dumper_t *idle_dumper()
1050 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1051 if(!dumper->busy && !dumper->down) return dumper;
1056 int some_dumps_in_progress()
1060 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1061 if(dumper->busy) return 1;
1066 int num_busy_dumpers()
1072 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1073 if(dumper->busy) n += 1;
1078 dumper_t *lookup_dumper(fd)
1083 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1084 if(dumper->outfd == fd) return dumper;
1090 void handle_dumper_result(fd)
1093 assignedhd_t **h=NULL;
1101 char *result_argv[MAX_ARGS+1];
1105 dumper = lookup_dumper(fd);
1107 assert(dp && sched(dp) && sched(dp)->destname);
1109 if(dp && sched(dp) && sched(dp)->holdp) {
1110 h = sched(dp)->holdp;
1111 activehd = sched(dp)->activehd;
1114 cmd = getresult(fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1117 sdp = serial2disk(result_argv[2]); /* result_argv[2] always contains the serial number */
1123 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <err str> */
1124 if(result_argc != 6) {
1125 error("error [dumper DONE result_argc != 6: %d]", result_argc);
1128 free_serial(result_argv[2]);
1130 origsize = (long)atof(result_argv[3]);
1131 dumpsize = (long)atof(result_argv[4]);
1132 dumptime = (long)atof(result_argv[5]);
1133 update_info_dumper(dp, origsize, dumpsize, dumptime);
1135 /* adjust holdp[active]->used using the real dumpsize and all other
1136 * holdp[i]->used as an estimate.
1140 for( i = 0, h = sched(dp)->holdp; i < activehd; i++ ) {
1141 dummy += h[i]->used;
1144 rename_tmp_holding(sched(dp)->destname, 1);
1145 assert( h && activehd >= 0 );
1146 h[activehd]->used = size_holding_files(sched(dp)->destname) - dummy;
1147 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1148 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1149 adjust_diskspace(dp, DONE);
1151 dp->host->inprogress -= 1;
1153 sched(dp)->attempted = 0;
1154 printf("driver: finished-cmd time %s %s dumped %s:%s\n",
1155 walltime_str(curclock()), dumper->name,
1156 dp->host->hostname, dp->name);
1159 enqueue_disk(&tapeq, dp);
1167 case TRYAGAIN: /* TRY-AGAIN <handle> <err str> */
1168 case FATAL_TRYAGAIN:
1169 free_serial(result_argv[2]);
1171 rename_tmp_holding(sched(dp)->destname, 0);
1172 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1173 assert( h && activehd >= 0 );
1174 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1175 /* Because we don't know how much was written to disk the
1176 * following functions *must* be called together!
1178 adjust_diskspace(dp, DONE);
1179 delete_diskspace(dp);
1181 dp->host->inprogress -= 1;
1184 if(sched(dp)->attempted) {
1185 log_add(L_FAIL, "%s %s %d %s [too many dumper retry]",
1186 dp->host->hostname, dp->name,
1187 sched(dp)->level, sched(dp)->datestamp);
1188 printf("driver: dump failed %s %s %s, too many dumper retry\n", result_argv[2], dp->host->hostname, dp->name);
1190 sched(dp)->attempted++;
1191 enqueue_disk(&runq, dp);
1195 if(cmd == FATAL_TRYAGAIN) {
1196 /* dumper is confused, start another */
1197 log_add(L_WARNING, "%s (pid %ld) confused, restarting it.",
1198 dumper->name, (long)dumper->pid);
1199 FD_CLR(fd,&readset);
1201 startup_dump_process(dumper, dumper_program);
1203 /* sleep in case the dumper failed because of a temporary network
1204 problem, as NIS or NFS... */
1208 case FAILED: /* FAILED <handle> <errstr> */
1209 free_serial(result_argv[2]);
1211 rename_tmp_holding(sched(dp)->destname, 0);
1212 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1213 assert( h && activehd >= 0 );
1214 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1215 /* Because we don't know how much was written to disk the
1216 * following functions *must* be called together!
1218 adjust_diskspace(dp, DONE);
1219 delete_diskspace(dp);
1221 dp->host->inprogress -= 1;
1225 /* no need to log this, dumper will do it */
1226 /* sleep in case the dumper failed because of a temporary network
1227 problem, as NIS or NFS... */
1231 case NO_ROOM: /* NO-ROOM <handle> <missing_size> */
1232 assert( h && activehd >= 0 );
1233 h[activehd]->used -= atoi(result_argv[3]);
1234 h[activehd]->reserved -= atoi(result_argv[3]);
1235 holdalloc(h[activehd]->disk)->allocated_space -= atoi(result_argv[3]);
1236 h[activehd]->disk->disksize -= atoi(result_argv[3]);
1239 case RQ_MORE_DISK: /* RQ-MORE-DISK <handle> */
1240 assert( h && activehd >= 0 );
1241 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1242 h[activehd]->used = h[activehd]->reserved;
1243 if( h[++activehd] ) { /* There's still some allocated space left. Tell
1244 * the dumper about it. */
1245 sched(dp)->activehd++;
1246 dumper_cmd( dumper, CONTINUE, dp );
1247 } else { /* !h[++activehd] - must allocate more space */
1248 sched(dp)->act_size = sched(dp)->est_size; /* not quite true */
1249 sched(dp)->est_size = sched(dp)->act_size * 21 / 20; /* +5% */
1250 sched(dp)->est_size = am_round(sched(dp)->est_size, DISK_BLOCK_KB);
1251 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1255 /* cur_idle = max(cur_idle, IDLE_NO_DISKSPACE); */
1256 /* No diskspace available. The reason for this will be
1257 * determined in continue_dumps(). */
1258 enqueue_disk( &roomq, dp );
1261 /* OK, allocate space for disk and have dumper continue */
1262 sched(dp)->activehd = assign_holdingdisk( h, dp );
1263 dumper_cmd( dumper, CONTINUE, dp );
1269 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1270 assert(pending_aborts);
1271 free_serial(result_argv[2]);
1273 rename_tmp_holding(sched(dp)->destname, 0);
1274 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1275 /* Because we don't know how much was written to disk the
1276 * following functions *must* be called together!
1278 adjust_diskspace(dp, DONE);
1279 delete_diskspace(dp);
1280 sched(dp)->attempted++;
1281 enqueue_disk(&runq, dp); /* we'll try again later */
1283 dp->host->inprogress -= 1;
1291 /* either EOF or garbage from dumper. Turn it off */
1292 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1293 dumper->name, (long)dumper->pid);
1294 FD_CLR(fd,&readset);
1297 dumper->down = 1; /* mark it down so it isn't used again */
1299 /* if it was dumping something, zap it and try again */
1300 rename_tmp_holding(sched(dp)->destname, 0);
1301 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1302 assert( h && activehd >= 0 );
1303 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1304 /* Because we don't know how much was written to disk the
1305 * following functions *must* be called together!
1307 adjust_diskspace(dp, DONE);
1308 delete_diskspace(dp);
1309 dp->host->inprogress -= 1;
1311 if(sched(dp)->attempted) {
1312 log_add(L_FAIL, "%s %s %d %s [%s died]",
1313 dp->host->hostname, dp->name,
1314 sched(dp)->level, sched(dp)->datestamp, dumper->name);
1317 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1318 dumper->name, dp->host->hostname, dp->name,
1320 sched(dp)->attempted++;
1321 enqueue_disk(&runq, dp);
1336 void read_flush(tapeqp)
1343 char *hostname, *diskname, *datestamp;
1347 char *inpline = NULL;
1351 long flush_size = 0;
1353 /* read schedule from stdin */
1355 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1361 skip_whitespace(s, ch); /* find the command */
1363 error("Aflush line %d: syntax error", line);
1367 skip_non_whitespace(s, ch);
1370 if(strcmp(command,"ENDFLUSH") == 0) {
1374 if(strcmp(command,"FLUSH") != 0) {
1375 error("Bflush line %d: syntax error", line);
1379 skip_whitespace(s, ch); /* find the hostname */
1381 error("Cflush line %d: syntax error", line);
1385 skip_non_whitespace(s, ch);
1388 skip_whitespace(s, ch); /* find the diskname */
1390 error("Cflush line %d: syntax error", line);
1394 skip_non_whitespace(s, ch);
1397 skip_whitespace(s, ch); /* find the datestamp */
1399 error("Cflush line %d: syntax error", line);
1403 skip_non_whitespace(s, ch);
1406 skip_whitespace(s, ch); /* find the level number */
1407 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1408 error("Cflush line %d: syntax error", line);
1411 skip_integer(s, ch);
1413 skip_whitespace(s, ch); /* find the filename */
1415 error("Cflush line %d: syntax error", line);
1419 skip_non_whitespace(s, ch);
1422 get_dumpfile(destname, &file);
1423 if( file.type != F_DUMPFILE) {
1424 if( file.type != F_CONT_DUMPFILE )
1425 log_add(L_INFO, "%s: ignoring cruft file.", destname);
1429 if(strcmp(hostname, file.name) != 0 ||
1430 strcmp(diskname, file.disk) != 0 ||
1431 strcmp(datestamp, file.datestamp) != 0) {
1432 log_add(L_INFO, "disk %s:%s not consistent with file %s",
1433 hostname, diskname, destname);
1437 dp = lookup_disk(file.name, file.disk);
1440 log_add(L_INFO, "%s: disk %s:%s not in database, skipping it.",
1441 destname, file.name, file.disk);
1445 if(file.dumplevel < 0 || file.dumplevel > 9) {
1446 log_add(L_INFO, "%s: ignoring file with bogus dump level %d.",
1447 destname, file.dumplevel);
1451 dp1 = (disk_t *)alloc(sizeof(disk_t));
1453 dp1->next = dp1->prev = NULL;
1455 /* add it to the flushhost list */
1457 flushhost = alloc(sizeof(am_host_t));
1458 flushhost->next = NULL;
1459 flushhost->hostname = stralloc("FLUSHHOST");
1460 flushhost->up = NULL;
1461 flushhost->features = NULL;
1463 dp1->hostnext = flushhost->disks;
1464 flushhost->disks = dp1;
1466 sp = (sched_t *) alloc(sizeof(sched_t));
1467 sp->destname = stralloc(destname);
1468 sp->level = file.dumplevel;
1469 sp->dumpdate = NULL;
1470 sp->degr_dumpdate = NULL;
1471 sp->datestamp = stralloc(file.datestamp);
1475 sp->degr_level = -1;
1478 sp->act_size = size_holding_files(destname);
1479 /*sp->holdp = NULL; JLM: must be build*/
1480 sp->holdp = build_diskspace(destname);
1481 if(sp->holdp == NULL) continue;
1483 sp->timestamp = (time_t)0;
1485 dp1->up = (char *)sp;
1487 enqueue_disk(tapeqp, dp1);
1488 flush_size += sp->act_size;
1490 printf("driver: flush size %ld\n", flush_size);
1495 void read_schedule(waitqp, runqp)
1496 disklist_t *waitqp, *runqp;
1500 int level, line, priority;
1501 char *dumpdate, *degr_dumpdate;
1503 long time, degr_time;
1504 unsigned long size, degr_size;
1505 char *hostname, *features, *diskname, *datestamp, *inpline = NULL;
1510 /* read schedule from stdin */
1512 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1518 skip_whitespace(s, ch); /* find the command */
1520 error("schedule line %d: syntax error (no command)", line);
1524 skip_non_whitespace(s, ch);
1527 if(strcmp(command,"DUMP") != 0) {
1528 error("schedule line %d: syntax error (%s != DUMP)", line, command);
1532 skip_whitespace(s, ch); /* find the host name */
1534 error("schedule line %d: syntax error (no host name)", line);
1538 skip_non_whitespace(s, ch);
1541 skip_whitespace(s, ch); /* find the feature list */
1543 error("schedule line %d: syntax error (no feature list)", line);
1547 skip_non_whitespace(s, ch);
1550 skip_whitespace(s, ch); /* find the disk name */
1552 error("schedule line %d: syntax error (no disk name)", line);
1556 skip_non_whitespace(s, ch);
1559 skip_whitespace(s, ch); /* find the datestamp */
1561 error("schedule line %d: syntax error (no datestamp)", line);
1565 skip_non_whitespace(s, ch);
1568 skip_whitespace(s, ch); /* find the priority number */
1569 if(ch == '\0' || sscanf(s - 1, "%d", &priority) != 1) {
1570 error("schedule line %d: syntax error (bad priority)", line);
1573 skip_integer(s, ch);
1575 skip_whitespace(s, ch); /* find the level number */
1576 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1577 error("schedule line %d: syntax error (bad level)", line);
1580 skip_integer(s, ch);
1582 skip_whitespace(s, ch); /* find the dump date */
1584 error("schedule line %d: syntax error (bad dump date)", line);
1588 skip_non_whitespace(s, ch);
1591 skip_whitespace(s, ch); /* find the size number */
1592 if(ch == '\0' || sscanf(s - 1, "%lu", &size) != 1) {
1593 error("schedule line %d: syntax error (bad size)", line);
1596 skip_integer(s, ch);
1598 skip_whitespace(s, ch); /* find the time number */
1599 if(ch == '\0' || sscanf(s - 1, "%ld", &time) != 1) {
1600 error("schedule line %d: syntax error (bad estimated time)", line);
1603 skip_integer(s, ch);
1605 degr_dumpdate = NULL; /* flag if degr fields found */
1606 skip_whitespace(s, ch); /* find the degr level number */
1608 if(sscanf(s - 1, "%d", °r_level) != 1) {
1609 error("schedule line %d: syntax error (bad degr level)", line);
1612 skip_integer(s, ch);
1614 skip_whitespace(s, ch); /* find the degr dump date */
1616 error("schedule line %d: syntax error (bad degr dump date)", line);
1619 degr_dumpdate = s - 1;
1620 skip_non_whitespace(s, ch);
1623 skip_whitespace(s, ch); /* find the degr size number */
1624 if(ch == '\0' || sscanf(s - 1, "%lu", °r_size) != 1) {
1625 error("schedule line %d: syntax error (bad degr size)", line);
1628 skip_integer(s, ch);
1630 skip_whitespace(s, ch); /* find the degr time number */
1631 if(ch == '\0' || sscanf(s - 1, "%lu", °r_time) != 1) {
1632 error("schedule line %d: syntax error (bad degr estimated time)", line);
1635 skip_integer(s, ch);
1638 dp = lookup_disk(hostname, diskname);
1641 "schedule line %d: %s:%s not in disklist, ignored",
1642 line, hostname, diskname);
1646 sp = (sched_t *) alloc(sizeof(sched_t));
1648 sp->dumpdate = stralloc(dumpdate);
1649 sp->est_size = DISK_BLOCK_KB + size; /* include header */
1650 sp->est_time = time;
1651 sp->priority = priority;
1652 sp->datestamp = stralloc(datestamp);
1655 sp->degr_level = degr_level;
1656 sp->degr_dumpdate = stralloc(degr_dumpdate);
1657 sp->degr_size = DISK_BLOCK_KB + degr_size;
1658 sp->degr_time = degr_time;
1660 sp->degr_level = -1;
1661 sp->degr_dumpdate = NULL;
1667 sp->est_kps = size/time;
1669 if(sp->degr_level != -1) {
1673 sp->degr_kps = degr_size/degr_time;
1681 sp->timestamp = (time_t)0;
1682 sp->destname = NULL;
1685 dp->up = (char *) sp;
1686 if(dp->host->features == NULL) {
1687 dp->host->features = am_string_to_feature(features);
1689 remove_disk(waitqp, dp);
1690 insert_disk(&runq, dp, sort_by_time);
1694 log_add(L_WARNING, "WARNING: got empty schedule from planner");
1702 if (ip == (interface_t *)0) {
1706 for(p = lookup_interface(NULL); p != NULL; p = p->next) {
1707 maxusage += p->maxusage;
1708 curusage += p->curusage;
1710 res = maxusage - curusage;
1713 res = ip->maxusage - ip->curusage;
1719 void interface_state(time_str)
1724 printf("driver: interface-state time %s", time_str);
1726 for(ip = lookup_interface(NULL); ip != NULL; ip = ip->next) {
1727 printf(" if %s: free %d", ip->name, free_kps(ip));
1732 void allocate_bandwidth(ip, kps)
1736 ip->curusage += kps;
1739 void deallocate_bandwidth(ip, kps)
1743 assert(kps <= ip->curusage);
1744 ip->curusage -= kps;
1748 unsigned long free_space()
1751 unsigned long total_free;
1755 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
1756 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
1763 assignedhd_t **find_diskspace(size, cur_idle, pref)
1767 /* Rewrite by Peter Conrad <conrad@opus5.de>, June '99:
1768 * - enable splitting a dump across several holding disks
1769 * - allocate only as much as size tells us, dumpers may request more later
1770 * We return an array of pointers to assignedhd_t. The array contains at
1771 * most one entry per holding disk. The list of pointers is terminated by
1772 * a NULL pointer. Each entry contains a pointer to a holdingdisk and
1773 * how much diskspace to use on that disk. Later on, assign_holdingdisk
1774 * will allocate the given amount of space.
1775 * If there is not enough room on the holdingdisks, NULL is returned.
1778 assignedhd_t **result = NULL;
1779 holdingdisk_t *minp, *hdp;
1780 int i=0, num_holdingdisks=0; /* are we allowed to use the global thing? */
1783 long halloc, dalloc, hfree, dfree;
1785 size = am_round(size, DISK_BLOCK_KB);
1788 printf("find diskspace: want %lu K\n", size );
1792 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
1796 used = alloc(sizeof(char) * num_holdingdisks);/*disks used during this run*/
1797 memset( used, 0, num_holdingdisks );
1798 result = alloc( sizeof(assignedhd_t *) * (num_holdingdisks+1) );
1801 while( i < num_holdingdisks && size > 0 ) {
1802 /* find the holdingdisk with the fewest active dumpers and among
1803 * those the one with the biggest free space
1805 minp = NULL; minj = -1;
1806 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next, j++ ) {
1807 if( pref && pref->disk == hdp && !used[j] &&
1808 holdalloc(hdp)->allocated_space <= hdp->disksize - DISK_BLOCK_KB) {
1813 else if( holdalloc(hdp)->allocated_space <= hdp->disksize - 2*DISK_BLOCK_KB &&
1816 holdalloc(hdp)->allocated_dumpers < holdalloc(minp)->allocated_dumpers ||
1817 (holdalloc(hdp)->allocated_dumpers == holdalloc(minp)->allocated_dumpers &&
1818 hdp->disksize-holdalloc(hdp)->allocated_space > minp->disksize-holdalloc(minp)->allocated_space)) ) {
1824 if( !minp ) { break; } /* all holding disks are full */
1827 /* hfree = free space on the disk */
1828 hfree = minp->disksize - holdalloc(minp)->allocated_space;
1830 /* dfree = free space for data, remove 1 header for each chunksize */
1831 dfree = hfree - (((hfree-1)/minp->chunksize)+1) * DISK_BLOCK_KB;
1833 /* dalloc = space I can allocate for data */
1834 dalloc = ( dfree < size ) ? dfree : size;
1836 /* halloc = space to allocate, including 1 header for each chunksize */
1837 halloc = dalloc + (((dalloc-1)/minp->chunksize)+1) * DISK_BLOCK_KB;
1840 fprintf(stdout,"find diskspace: size %ld hf %ld df %ld da %ld ha %ld\n", size, hfree, dfree, dalloc, halloc);
1844 result[i] = alloc(sizeof(assignedhd_t));
1845 result[i]->disk = minp;
1846 result[i]->reserved = halloc;
1847 result[i]->used = 0;
1848 result[i]->destname = NULL;
1851 } /* while i < num_holdingdisks && size > 0 */
1854 if( size ) { /* not enough space available */
1856 printf("find diskspace: not enough diskspace. Left with %lu K\n", size);
1859 free_assignedhd(result);
1864 for( i = 0; result && result[i]; i++ ) {
1865 printf("find diskspace: selected %s free %ld reserved %ld dumpers %d\n",
1866 result[i]->disk->diskdir,
1867 result[i]->disk->disksize - holdalloc(result[i]->disk)->allocated_space,
1868 result[i]->reserved,
1869 holdalloc(result[i]->disk)->allocated_dumpers);
1877 int assign_holdingdisk(holdp, diskp)
1878 assignedhd_t **holdp;
1881 /* Modified by Peter Conrad <conrad@opus5.de>, June '99
1882 * Modifications for splitting dumps across holding disks:
1883 * sched(diskp)->holdp now contains an array of pointers to assignedhd_t.
1887 char *sfn = sanitise_filename(diskp->name);
1889 assignedhd_t **new_holdp;
1891 ap_snprintf( lvl, sizeof(lvl), "%d", sched(diskp)->level );
1893 size = am_round(sched(diskp)->est_size - sched(diskp)->act_size,
1896 for( c = 0; holdp[c]; c++ ); /* count number of disks */
1898 /* allocate memory for sched(diskp)->holdp */
1899 for(j = 0; sched(diskp)->holdp && sched(diskp)->holdp[j]; j++) {}
1900 new_holdp = (assignedhd_t **)alloc(sizeof(assignedhd_t*)*(j+c+1));
1901 if (sched(diskp)->holdp) {
1902 memcpy(new_holdp, sched(diskp)->holdp, j * sizeof(*new_holdp));
1903 amfree(sched(diskp)->holdp);
1905 sched(diskp)->holdp = new_holdp;
1909 if( j > 0 ) { /* This is a request for additional diskspace. See if we can
1910 * merge assignedhd_t's */
1912 if( sched(diskp)->holdp[j-1]->disk == holdp[0]->disk ) { /* Yes! */
1913 sched(diskp)->holdp[j-1]->reserved += holdp[0]->reserved;
1914 holdalloc(holdp[0]->disk)->allocated_space += holdp[0]->reserved;
1915 size = (holdp[0]->reserved>size) ? 0 : size-holdp[0]->reserved;
1917 printf("merging holding disk %s to disk %s:%s, add %lu for reserved %lu, left %lu\n",
1918 sched(diskp)->holdp[j-1]->disk->diskdir,
1919 diskp->host->hostname, diskp->name,
1920 holdp[0]->reserved, sched(diskp)->holdp[j-1]->reserved,
1930 /* copy assignedhd_s to sched(diskp), adjust allocated_space */
1931 for( ; holdp[i]; i++ ) {
1932 holdp[i]->destname = newvstralloc( holdp[i]->destname,
1933 holdp[i]->disk->diskdir, "/",
1935 diskp->host->hostname, ".",
1938 sched(diskp)->holdp[j++] = holdp[i];
1939 holdalloc(holdp[i]->disk)->allocated_space += holdp[i]->reserved;
1940 size = (holdp[i]->reserved>size) ? 0 : size-holdp[i]->reserved;
1942 printf("assigning holding disk %s to disk %s:%s, reserved %lu, left %lu\n",
1943 holdp[i]->disk->diskdir, diskp->host->hostname, diskp->name,
1944 holdp[i]->reserved, size );
1947 holdp[i] = NULL; /* so it doesn't get free()d... */
1949 sched(diskp)->holdp[j] = NULL;
1950 sched(diskp)->destname = newstralloc(sched(diskp)->destname,sched(diskp)->holdp[0]->destname);
1956 static void adjust_diskspace(diskp, cmd)
1960 /* Re-write by Peter Conrad <conrad@opus5.de>, March '99
1961 * Modifications for splitting dumps across holding disks:
1962 * Dumpers no longer write more than they've allocated, therefore an
1963 * adjustment may only free some allocated space.
1964 * 08/99: Jean-Louis suggested that dumpers tell us how much they've written.
1965 * We just believe them and don't stat all the files but rely on the used
1969 assignedhd_t **holdp;
1970 unsigned long total=0;
1975 printf("adjust: %s:%s %s\n", diskp->host->hostname, diskp->name,
1976 sched(diskp)->destname );
1980 holdp = sched(diskp)->holdp;
1984 for( i = 0; holdp[i]; i++ ) { /* for each allocated disk */
1985 diff = holdp[i]->used - holdp[i]->reserved;
1986 total += holdp[i]->used;
1987 holdalloc(holdp[i]->disk)->allocated_space += diff;
1989 printf("adjust: hdisk %s done, reserved %ld used %ld diff %ld alloc %ld dumpers %d\n",
1990 holdp[i]->disk->name, holdp[i]->reserved, holdp[i]->used, diff,
1991 holdalloc(holdp[i]->disk)->allocated_space,
1992 holdalloc(holdp[i]->disk)->allocated_dumpers );
1995 holdp[i]->reserved += diff;
1998 sched(diskp)->act_size = total;
2000 printf("adjust: after: disk %s:%s used %ld\n", diskp->host->hostname,
2001 diskp->name, sched(diskp)->act_size );
2006 static void delete_diskspace(diskp)
2009 /* Re-write by Peter Conrad <conrad@opus5.de>, March '99
2010 * Modifications for splitting dumps across holding disks:
2011 * After implementing Jean-Louis' suggestion (see above) this looks much
2012 * simpler... again, we rely on assignedhd_s containing correct info
2014 assignedhd_t **holdp;
2017 holdp = sched(diskp)->holdp;
2021 for( i = 0; holdp[i]; i++ ) { /* for each disk */
2022 /* find all files of this dump on that disk, and subtract their
2023 * reserved sizes from the disk's allocated space
2025 holdalloc(holdp[i]->disk)->allocated_space -= holdp[i]->used;
2028 unlink_holding_files(holdp[0]->destname); /* no need for the entire list,
2029 because unlink_holding_files
2030 will walk through all files
2031 using cont_filename */
2033 free_assignedhd(sched(diskp)->holdp);
2034 sched(diskp)->holdp = NULL;
2035 sched(diskp)->act_size = 0;
2036 amfree(sched(diskp)->destname);
2039 assignedhd_t **build_diskspace(destname)
2045 char buffer[DISK_BLOCK_BYTES];
2047 assignedhd_t **result;
2050 int num_holdingdisks=0;
2051 char dirname[1000], *ch;
2053 char *filename = destname;
2055 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2058 used = alloc(sizeof(int) * num_holdingdisks);
2059 for(i=0;i<num_holdingdisks;i++)
2061 result = alloc( sizeof(assignedhd_t *) * (num_holdingdisks+1) );
2063 while(filename != NULL && filename[0] != '\0') {
2064 strncpy(dirname, filename, 999);
2066 ch = strrchr(dirname,'/');
2068 ch = strrchr(dirname,'/');
2071 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL;
2072 hdp = hdp->next, j++ ) {
2073 if(strcmp(dirname,hdp->diskdir)==0) {
2078 if(stat(filename, &finfo) == -1) {
2079 fprintf(stderr, "stat %s: %s\n", filename, strerror(errno));
2082 used[j] += (finfo.st_size+1023)/1024;
2083 if((fd = open(filename,O_RDONLY)) == -1) {
2084 fprintf(stderr,"build_diskspace: open of %s failed: %s\n",
2085 filename, strerror(errno));
2088 buflen = fullread(fd, buffer, sizeof(buffer));
2089 parse_file_header(buffer, &file, buflen);
2091 filename = file.cont_filename;
2094 for(j = 0, i=0, hdp = getconf_holdingdisks(); hdp != NULL;
2095 hdp = hdp->next, j++ ) {
2097 result[i] = alloc(sizeof(assignedhd_t));
2098 result[i]->disk = hdp;
2099 result[i]->reserved = used[j];
2100 result[i]->used = used[j];
2101 result[i]->destname = stralloc(destname);
2112 void holdingdisk_state(time_str)
2119 printf("driver: hdisk-state time %s", time_str);
2121 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
2122 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2123 printf(" hdisk %d: free %ld dumpers %d", dsk, diff,
2124 holdalloc(hdp)->allocated_dumpers);
2129 static void update_failed_dump_to_tape(dp)
2132 time_t save_timestamp = sched(dp)->timestamp;
2133 /* setting timestamp to 0 removes the current level from the
2134 * database, so that we ensure that it will not be bumped to the
2135 * next level on the next run. If we didn't do this, dumpdates or
2136 * gnutar-lists might have been updated already, and a bumped
2137 * incremental might be created. */
2138 sched(dp)->timestamp = 0;
2139 update_info_dumper(dp, -1, -1, -1);
2140 sched(dp)->timestamp = save_timestamp;
2143 /* ------------------- */
2144 int dump_to_tape(dp)
2155 char *result_argv[MAX_ARGS+1];
2156 int dumper_tryagain = 0;
2158 inside_dump_to_tape = 1; /* for simulator */
2160 printf("driver: dumping %s:%s directly to tape\n",
2161 dp->host->hostname, dp->name);
2164 /* pick a dumper and fail if there are no idle dumpers */
2166 dumper = idle_dumper();
2168 printf("driver: no idle dumpers for %s:%s.\n",
2169 dp->host->hostname, dp->name);
2171 log_add(L_WARNING, "no idle dumpers for %s:%s.\n",
2172 dp->host->hostname, dp->name);
2173 inside_dump_to_tape = 0;
2174 return 2; /* fatal problem */
2177 /* tell the taper to read from a port number of its choice */
2179 taper_cmd(PORT_WRITE, dp, NULL, sched(dp)->level, sched(dp)->datestamp);
2180 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2182 printf("driver: did not get PORT from taper for %s:%s\n",
2183 dp->host->hostname, dp->name);
2185 inside_dump_to_tape = 0;
2186 return 2; /* fatal problem */
2188 /* copy port number */
2189 sched(dp)->destname = newvstralloc(sched(dp)->destname, result_argv[2], NULL );
2191 /* tell the dumper to dump to a port */
2193 dumper_cmd(dumper, PORT_DUMP, dp);
2194 dp->host->start_t = time(NULL) + 15;
2196 /* update statistics & print state */
2198 taper_busy = dumper->busy = 1;
2199 dp->host->inprogress += 1;
2201 sched(dp)->timestamp = time((time_t *)0);
2202 allocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2207 /* wait for result from dumper */
2209 cmd = getresult(dumper->outfd, 1, &result_argc, result_argv, MAX_ARGS+1);
2212 free_serial(result_argv[2]);
2216 /* either eof or garbage from dumper */
2217 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
2218 dumper->name, (long)dumper->pid);
2219 dumper->down = 1; /* mark it down so it isn't used again */
2220 failed = 1; /* dump failed, must still finish up with taper */
2223 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <err str> */
2224 /* everything went fine */
2225 origsize = (long)atof(result_argv[3]);
2226 dumpsize = (long)atof(result_argv[4]);
2227 dumptime = (long)atof(result_argv[5]);
2230 case NO_ROOM: /* NO-ROOM <handle> */
2231 dumper_cmd(dumper, ABORT, dp);
2232 cmd = getresult(dumper->outfd, 1, &result_argc, result_argv, MAX_ARGS+1);
2234 free_serial(result_argv[2]);
2235 assert(cmd == ABORT_FINISHED);
2237 case TRYAGAIN: /* TRY-AGAIN <handle> <err str> */
2239 /* dump failed, but we must still finish up with taper */
2240 /* problem with dump, possibly nonfatal, retry one time */
2241 sched(dp)->attempted++;
2242 failed = sched(dp)->attempted;
2243 dumper_tryagain = 1;
2246 case FAILED: /* FAILED <handle> <errstr> */
2247 /* dump failed, but we must still finish up with taper */
2248 failed = 2; /* fatal problem with dump */
2253 * Note that at this point, even if the dump above failed, it may
2254 * not be a fatal failure if taper below says we can try again.
2255 * E.g. a dumper failure above may actually be the result of a
2256 * tape overflow, which in turn causes dump to see "broken pipe",
2257 * "no space on device", etc., since taper closed the port first.
2260 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2263 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
2264 if(result_argc != 5) {
2265 error("error [dump to tape DONE result_argc != 5: %d]", result_argc);
2268 if(failed == 1) goto tryagain; /* dump didn't work */
2269 else if(failed == 2) goto failed_dumper;
2271 free_serial(result_argv[2]);
2273 /* every thing went fine */
2274 update_info_dumper(dp, origsize, dumpsize, dumptime);
2275 filenum = atoi(result_argv[4]);
2276 update_info_taper(dp, result_argv[3], filenum, sched(dp)->level);
2277 /* note that update_info_dumper() must be run before
2278 update_info_taper(), since update_info_dumper overwrites
2279 tape information. */
2283 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
2284 if(dumper_tryagain == 0) {
2285 sched(dp)->attempted++;
2286 if(sched(dp)->attempted > failed)
2287 failed = sched(dp)->attempted;
2291 headqueue_disk(&runq, dp);
2293 update_failed_dump_to_tape(dp);
2294 free_serial(result_argv[2]);
2295 tape_left = tape_length;
2299 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
2302 update_failed_dump_to_tape(dp);
2303 free_serial(result_argv[2]);
2304 failed = 2; /* fatal problem */
2305 start_degraded_mode(&runq);
2308 /* reset statistics & return */
2310 taper_busy = dumper->busy = 0;
2311 dp->host->inprogress -= 1;
2313 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2315 inside_dump_to_tape = 0;
2325 for(len = 0, p = q.head; p != NULL; len++, p = p->next);
2330 void short_dump_state()
2335 wall_time = walltime_str(curclock());
2337 printf("driver: state time %s ", wall_time);
2338 printf("free kps: %d space: %lu taper: ",
2339 free_kps((interface_t *)0), free_space());
2340 if(degraded_mode) printf("DOWN");
2341 else if(!taper_busy) printf("idle");
2342 else printf("writing");
2344 for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
2345 printf(" idle-dumpers: %d", nidle);
2346 printf(" qlen tapeq: %d", queue_length(tapeq));
2347 printf(" runq: %d", queue_length(runq));
2348 printf(" roomq: %d", queue_length(roomq));
2349 printf(" wakeup: %d", (int)sleep_time.tv_sec);
2350 printf(" driver-idle: %s\n", idle_strings[idle_reason]);
2351 interface_state(wall_time);
2352 holdingdisk_state(wall_time);
2356 void dump_state(str)
2362 printf("================\n");
2363 printf("driver state at time %s: %s\n", walltime_str(curclock()), str);
2364 printf("free kps: %d, space: %lu\n", free_kps((interface_t *)0), free_space());
2365 if(degraded_mode) printf("taper: DOWN\n");
2366 else if(!taper_busy) printf("taper: idle\n");
2367 else printf("taper: writing %s:%s.%d est size %lu\n",
2368 taper_disk->host->hostname, taper_disk->name,
2369 sched(taper_disk)->level,
2370 sched(taper_disk)->est_size);
2371 for(i = 0; i < inparallel; i++) {
2372 dp = dmptable[i].dp;
2373 if(!dmptable[i].busy)
2374 printf("%s: idle\n", dmptable[i].name);
2376 printf("%s: dumping %s:%s.%d est kps %d size %lu time %ld\n",
2377 dmptable[i].name, dp->host->hostname, dp->name, sched(dp)->level,
2378 sched(dp)->est_kps, sched(dp)->est_size, sched(dp)->est_time);
2380 dump_queue("TAPE", tapeq, 5, stdout);
2381 dump_queue("ROOM", roomq, 5, stdout);
2382 dump_queue("RUN ", runq, 5, stdout);
2383 printf("================\n");