2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1998 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: driver.c,v 1.165.2.2 2006/04/23 23:04:33 martinea Exp $
29 * controlling process for the Amanda backup system
33 * XXX possibly modify tape queue to be cognizant of how much room is left on
34 * tape. Probably not effective though, should do this in planner.
37 /*#define HOLD_DEBUG*/
50 #include "server_util.h"
52 static disklist_t waitq, runq, tapeq, roomq;
53 static int pending_aborts;
54 static disk_t *taper_disk;
55 static int degraded_mode;
56 static unsigned long reserved_space;
57 static unsigned long total_disksize;
58 static char *dumper_program;
59 static char *chunker_program;
60 static int inparallel;
61 static int nodump = 0;
62 static unsigned long tape_length, tape_left = 0;
63 static int current_tape = 1;
64 static int conf_taperalgo;
65 static int conf_runtapes;
66 static time_t sleep_time;
67 static int idle_reason;
68 static char *datestamp;
69 static char *timestamp;
70 static am_host_t *flushhost = NULL;
71 static int need_degraded=0;
73 static event_handle_t *dumpers_ev_time = NULL;
74 static event_handle_t *schedule_ev_read = NULL;
76 static void allocate_bandwidth P((interface_t *ip, int kps));
77 static int assign_holdingdisk P((assignedhd_t **holdp, disk_t *diskp));
78 static void adjust_diskspace P((disk_t *diskp, cmd_t cmd));
79 static void delete_diskspace P((disk_t *diskp));
80 static assignedhd_t **build_diskspace P((char *destname));
81 static int client_constrained P((disk_t *dp));
82 static void deallocate_bandwidth P((interface_t *ip, int kps));
83 static void dump_schedule P((disklist_t *qp, char *str));
84 static int dump_to_tape P((disk_t *dp));
85 static assignedhd_t **find_diskspace P((unsigned long size, int *cur_idle,
86 assignedhd_t *preferred));
87 static int free_kps P((interface_t *ip));
88 static unsigned long free_space P((void));
89 static void dumper_result P((disk_t *dp));
90 static void handle_dumper_result P((void *));
91 static void handle_chunker_result P((void *));
92 static void handle_dumpers_time P((void *));
93 static void handle_taper_result P((void *));
94 static void holdingdisk_state P((char *time_str));
95 static dumper_t *idle_dumper P((void));
96 static void interface_state P((char *time_str));
97 static int num_busy_dumpers P((void));
98 static int queue_length P((disklist_t q));
99 static disklist_t read_flush P((void));
100 static void read_schedule P((void *cookie));
101 static void short_dump_state P((void));
102 static void startaflush P((void));
103 static void start_degraded_mode P((disklist_t *queuep));
104 static void start_some_dumps P((disklist_t *rq));
105 static void continue_port_dumps();
106 static void update_failed_dump_to_tape P((disk_t *));
108 static void dump_state P((const char *str));
110 int main P((int main_argc, char **main_argv));
112 static const char *idle_strings[] = {
115 #define IDLE_NO_DUMPERS 1
117 #define IDLE_START_WAIT 2
119 #define IDLE_NO_HOLD 3
121 #define IDLE_CLIENT_CONSTRAINED 4
122 "client-constrained",
123 #define IDLE_NO_DISKSPACE 5
125 #define IDLE_TOO_LARGE 6
127 #define IDLE_NO_BANDWIDTH 7
129 #define IDLE_TAPER_WAIT 8
134 main(main_argc, main_argv)
143 generic_fs_stats_t fs;
145 unsigned long malloc_hist_1, malloc_size_1;
146 unsigned long malloc_hist_2, malloc_size_2;
147 unsigned long reserve = 100;
152 char *result_argv[MAX_ARGS+1];
160 setvbuf(stdout, (char *)NULL, _IOLBF, 0);
161 setvbuf(stderr, (char *)NULL, _IOLBF, 0);
165 /* Don't die when child closes pipe */
166 signal(SIGPIPE, SIG_IGN);
168 malloc_size_1 = malloc_inuse(&malloc_hist_1);
170 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
171 set_logerror(logerror);
175 printf("%s: pid %ld executable %s version %s\n",
176 get_pname(), (long) getpid(), main_argv[0], version());
179 config_name = stralloc(main_argv[1]);
180 config_dir = vstralloc(CONFIG_DIR, "/", config_name, "/", NULL);
182 if(strncmp(main_argv[2], "nodump", 6) == 0) {
188 char my_cwd[STR_SIZE];
190 if (getcwd(my_cwd, sizeof(my_cwd)) == NULL) {
191 error("cannot determine current working directory");
193 config_dir = stralloc2(my_cwd, "/");
194 if ((config_name = strrchr(my_cwd, '/')) != NULL) {
195 config_name = stralloc(config_name + 1);
201 conffile = stralloc2(config_dir, CONFFILE_NAME);
202 if(read_conffile(conffile)) {
203 error("errors processing config file \"%s\"", conffile);
208 datestamp = construct_datestamp(NULL);
209 timestamp = construct_timestamp(NULL);
210 log_add(L_START,"date %s", datestamp);
212 taper_program = vstralloc(libexecdir, "/", "taper", versionsuffix(), NULL);
213 dumper_program = vstralloc(libexecdir, "/", "dumper", versionsuffix(),
215 chunker_program = vstralloc(libexecdir, "/", "chunker", versionsuffix(),
218 conf_taperalgo = getconf_int(CNF_TAPERALGO);
219 conf_tapetype = getconf_str(CNF_TAPETYPE);
220 conf_runtapes = getconf_int(CNF_RUNTAPES);
221 tape = lookup_tapetype(conf_tapetype);
222 tape_length = tape->length;
223 printf("driver: tape size %ld\n", tape_length);
225 /* taper takes a while to get going, so start it up right away */
228 if(conf_runtapes > 0) {
229 startup_tape_process(taper_program);
230 taper_cmd(START_TAPER, datestamp, NULL, 0, NULL);
233 /* start initializing: read in databases */
235 conf_diskfile = getconf_str(CNF_DISKFILE);
236 if (*conf_diskfile == '/') {
237 conf_diskfile = stralloc(conf_diskfile);
239 conf_diskfile = stralloc2(config_dir, conf_diskfile);
241 if (read_diskfile(conf_diskfile, &origq) < 0)
242 error("could not load disklist \"%s\"", conf_diskfile);
243 amfree(conf_diskfile);
245 /* set up any configuration-dependent variables */
247 inparallel = getconf_int(CNF_INPARALLEL);
249 reserve = getconf_int(CNF_RESERVE);
252 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
253 hdp->up = (void *)alloc(sizeof(holdalloc_t));
254 holdalloc(hdp)->allocated_dumpers = 0;
255 holdalloc(hdp)->allocated_space = 0L;
257 if(get_fs_stats(hdp->diskdir, &fs) == -1
258 || access(hdp->diskdir, W_OK) == -1) {
259 log_add(L_WARNING, "WARNING: ignoring holding disk %s: %s\n",
260 hdp->diskdir, strerror(errno));
266 if(hdp->disksize > 0) {
267 if(hdp->disksize > fs.avail) {
269 "WARNING: %s: %ld KB requested, but only %ld KB available.",
270 hdp->diskdir, hdp->disksize, fs.avail);
271 hdp->disksize = fs.avail;
274 else if(fs.avail + hdp->disksize < 0) {
276 "WARNING: %s: not %ld KB free.",
277 hdp->diskdir, -hdp->disksize);
282 hdp->disksize += fs.avail;
285 printf("driver: adding holding disk %d dir %s size %ld chunksize %ld\n",
286 dsk, hdp->diskdir, hdp->disksize, hdp->chunksize);
288 newdir = newvstralloc(newdir,
289 hdp->diskdir, "/", timestamp,
291 if(!mkholdingdir(newdir)) {
294 total_disksize += hdp->disksize;
297 reserved_space = total_disksize * (reserve / 100.0);
299 printf("reserving %ld out of %ld for degraded-mode dumps\n",
300 reserved_space, free_space());
304 if(inparallel > MAX_DUMPERS) inparallel = MAX_DUMPERS;
306 /* fire up the dumpers now while we are waiting */
307 if(!nodump) startup_dump_processes(dumper_program, inparallel);
310 * Read schedule from stdin. Usually, this is a pipe from planner,
311 * so the effect is that we wait here for the planner to
312 * finish, but meanwhile the taper is rewinding the tape, reading
313 * the label, checking it, writing a new label and all that jazz
314 * in parallel with the planner.
320 tapeq = read_flush();
322 roomq.head = roomq.tail = NULL;
324 log_add(L_STATS, "startup time %s", walltime_str(curclock()));
326 printf("driver: start time %s inparallel %d bandwidth %d diskspace %lu",
327 walltime_str(curclock()), inparallel, free_kps((interface_t *)0),
329 printf(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n",
330 "OBSOLETE", datestamp, taperalgo2str(conf_taperalgo),
331 getconf_str(CNF_DUMPORDER));
334 /* ok, planner is done, now lets see if the tape is ready */
336 if(conf_runtapes > 0) {
337 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
339 if(cmd != TAPER_OK) {
340 /* no tape, go into degraded mode: dump to holding disk */
348 tape_left = tape_length;
351 taper_ev_read = NULL;
352 if(!need_degraded) startaflush();
355 schedule_ev_read = event_register(0, EV_READFD, read_schedule, NULL);
360 /* handle any remaining dumps by dumping directly to tape, if possible */
362 while(!empty(runq) && taper > 0) {
363 diskp = dequeue_disk(&runq);
365 int rc = dump_to_tape(diskp);
368 "%s %s %d [dump to tape failed, will try again]",
369 diskp->host->hostname,
371 sched(diskp)->level);
373 log_add(L_FAIL, "%s %s %s %d [dump to tape failed]",
374 diskp->host->hostname,
376 sched(diskp)->datestamp,
377 sched(diskp)->level);
380 log_add(L_FAIL, "%s %s %s %d [%s]",
381 diskp->host->hostname, diskp->name, sched(diskp)->datestamp,
384 "can't dump no-hold disk in degraded mode" :
385 "no more holding disk space");
388 short_dump_state(); /* for amstatus */
390 printf("driver: QUITTING time %s telling children to quit\n",
391 walltime_str(curclock()));
395 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
396 dumper_cmd(dumper, QUIT, NULL);
401 taper_cmd(QUIT, NULL, NULL, 0, NULL);
404 /* wait for all to die */
407 char number[NUM_STR_SIZE];
413 if((pid = wait(&retstat)) == -1) {
414 if(errno == EINTR) continue;
418 if(! WIFEXITED(retstat)) {
420 code = WTERMSIG(retstat);
421 } else if(WEXITSTATUS(retstat) != 0) {
423 code = WEXITSTATUS(retstat);
426 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
427 if(pid == dumper->pid) {
428 who = stralloc(dumper->name);
432 if(who == NULL && pid == taper_pid) {
433 who = stralloc("taper");
435 if(what != NULL && who == NULL) {
436 snprintf(number, sizeof(number), "%ld", (long)pid);
437 who = stralloc2("unknown pid ", number);
440 log_add(L_WARNING, "%s exited with %s %d\n", who, what, code);
441 printf("driver: %s exited with %s %d\n", who, what, code);
446 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
447 amfree(dumper->name);
450 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
451 cleanup_holdingdisk(hdp->diskdir, 0);
456 check_unfree_serial();
457 printf("driver: FINISHED time %s\n", walltime_str(curclock()));
459 log_add(L_FINISH,"date %s time %s", datestamp, walltime_str(curclock()));
463 amfree(dumper_program);
464 amfree(taper_program);
468 malloc_size_2 = malloc_inuse(&malloc_hist_2);
470 if(malloc_size_1 != malloc_size_2) {
471 malloc_list(fileno(stderr), malloc_hist_1, malloc_hist_2);
483 unsigned int extra_tapes = 0;
484 if(!degraded_mode && !taper_busy && !empty(tapeq)) {
486 datestamp = sched(tapeq.head)->datestamp;
487 switch(conf_taperalgo) {
489 dp = dequeue_disk(&tapeq);
493 while (fit != NULL) {
494 extra_tapes = (fit->tape_splitsize > 0) ?
495 conf_runtapes - current_tape : 0;
496 if(sched(fit)->act_size <= (tape_left + tape_length*extra_tapes) &&
497 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
505 if(dp) remove_disk(&tapeq, dp);
508 fit = dp = tapeq.head;
509 while (fit != NULL) {
510 if(sched(fit)->act_size > sched(dp)->act_size &&
511 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
516 if(dp) remove_disk(&tapeq, dp);
518 case ALGO_LARGESTFIT:
520 while (fit != NULL) {
521 extra_tapes = (fit->tape_splitsize > 0) ?
522 conf_runtapes - current_tape : 0;
523 if(sched(fit)->act_size <= (tape_left + tape_length*extra_tapes) &&
524 (!dp || sched(fit)->act_size > sched(dp)->act_size) &&
525 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
530 if(dp) remove_disk(&tapeq, dp);
536 remove_disk(&tapeq, dp);
539 if(!dp) { /* ALGO_SMALLEST, or default if nothing fit. */
540 if(conf_taperalgo != ALGO_SMALLEST) {
542 "driver: startaflush: Using SMALLEST because nothing fit\n");
544 fit = dp = tapeq.head;
545 while (fit != NULL) {
546 if(sched(fit)->act_size < sched(dp)->act_size &&
547 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
552 if(dp) remove_disk(&tapeq, dp);
554 if(taper_ev_read == NULL) {
555 taper_ev_read = event_register(taper, EV_READFD,
556 handle_taper_result, NULL);
561 taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
562 sched(dp)->datestamp);
563 fprintf(stderr,"driver: startaflush: %s %s %s %ld %ld\n",
564 taperalgo2str(conf_taperalgo), dp->host->hostname,
565 dp->name, sched(taper_disk)->act_size, tape_left);
566 if(sched(dp)->act_size <= tape_left)
567 tape_left -= sched(dp)->act_size;
571 error("FATAL: Taper marked busy and no work found.");
574 } else if(!taper_busy && taper_ev_read != NULL) {
575 event_release(taper_ev_read);
576 taper_ev_read = NULL;
582 client_constrained(dp)
587 /* first, check if host is too busy */
589 if(dp->host->inprogress >= dp->host->maxdumps) {
593 /* next, check conflict with other dumps on same spindle */
595 if(dp->spindle == -1) { /* but spindle -1 never conflicts by def. */
599 for(dp2 = dp->host->disks; dp2 != NULL; dp2 = dp2->hostnext)
600 if(dp2->inprogress && dp2->spindle == dp->spindle) {
612 disk_t *diskp, *delayed_diskp, *diskp_accept;
613 assignedhd_t **holdp=NULL, **holdp_accept;
614 const time_t now = time(NULL);
617 char *result_argv[MAX_ARGS+1];
623 idle_reason = IDLE_NO_DUMPERS;
626 if(dumpers_ev_time != NULL) {
627 event_release(dumpers_ev_time);
628 dumpers_ev_time = NULL;
631 for (dumper = dmptable; dumper < dmptable+inparallel; dumper++) {
637 if (dumper->ev_read != NULL) {
638 /* assert(dumper->ev_read == NULL);*/
639 event_release(dumper->ev_read);
640 dumper->ev_read = NULL;
644 * A potential problem with starting from the bottom of the dump time
645 * distribution is that a slave host will have both one of the shortest
646 * and one of the longest disks, so starting its shortest disk first will
647 * tie up the host and eliminate its longest disk from consideration the
648 * first pass through. This could cause a big delay in starting that long
649 * disk, which could drag out the whole night's dumps.
651 * While starting from the top of the dump time distribution solves the
652 * above problem, this turns out to be a bad idea, because the big dumps
653 * will almost certainly pack the holding disk completely, leaving no
654 * room for even one small dump to start. This ends up shutting out the
655 * small-end dumpers completely (they stay idle).
657 * The introduction of multiple simultaneous dumps to one host alleviates
658 * the biggest&smallest dumps problem: both can be started at the
664 delayed_diskp = NULL;
668 dumporder = getconf_str(CNF_DUMPORDER);
669 if(strlen(dumporder) > (dumper-dmptable)) {
670 dumptype = dumporder[dumper-dmptable];
673 if(dumper-dmptable < 3)
679 for(diskp = rq->head; diskp != NULL; diskp = diskp->next) {
680 assert(diskp->host != NULL && sched(diskp) != NULL);
682 /* round estimate to next multiple of DISK_BLOCK_KB */
683 sched(diskp)->est_size = am_round(sched(diskp)->est_size,
686 if (diskp->host->start_t > now) {
687 cur_idle = max(cur_idle, IDLE_START_WAIT);
688 if (delayed_diskp == NULL || sleep_time > diskp->host->start_t) {
689 delayed_diskp = diskp;
690 sleep_time = diskp->host->start_t;
692 } else if(diskp->start_t > now) {
693 cur_idle = max(cur_idle, IDLE_START_WAIT);
694 if (delayed_diskp == NULL || sleep_time > diskp->start_t) {
695 delayed_diskp = diskp;
696 sleep_time = diskp->start_t;
698 } else if (diskp->host->netif->curusage > 0 &&
699 sched(diskp)->est_kps > free_kps(diskp->host->netif)) {
700 cur_idle = max(cur_idle, IDLE_NO_BANDWIDTH);
701 } else if(sched(diskp)->no_space) {
702 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
704 find_diskspace(sched(diskp)->est_size,&cur_idle,NULL)) == NULL) {
705 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
706 } else if (diskp->no_hold) {
707 free_assignedhd(holdp);
708 cur_idle = max(cur_idle, IDLE_NO_HOLD);
709 } else if (client_constrained(diskp)) {
710 free_assignedhd(holdp);
711 cur_idle = max(cur_idle, IDLE_CLIENT_CONSTRAINED);
714 /* disk fits, dump it */
715 int accept = !diskp_accept;
718 case 's': accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
720 case 'S': accept = (sched(diskp)->est_size > sched(diskp_accept)->est_size);
722 case 't': accept = (sched(diskp)->est_time < sched(diskp_accept)->est_time);
724 case 'T': accept = (sched(diskp)->est_time > sched(diskp_accept)->est_time);
726 case 'b': accept = (sched(diskp)->est_kps < sched(diskp_accept)->est_kps);
728 case 'B': accept = (sched(diskp)->est_kps > sched(diskp_accept)->est_kps);
730 default: log_add(L_WARNING, "Unknown dumporder character \'%c\', using 's'.\n",
732 accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
737 if( !diskp_accept || !degraded_mode || diskp->priority >= diskp_accept->priority) {
738 if(holdp_accept) free_assignedhd(holdp_accept);
739 diskp_accept = diskp;
740 holdp_accept = holdp;
743 free_assignedhd(holdp);
747 free_assignedhd(holdp);
752 diskp = diskp_accept;
753 holdp = holdp_accept;
755 idle_reason = max(idle_reason, cur_idle);
758 * If we have no disk at this point, and there are disks that
759 * are delayed, then schedule a time event to call this dumper
760 * with the disk with the shortest delay.
762 if (diskp == NULL && delayed_diskp != NULL) {
763 assert(sleep_time > now);
765 dumpers_ev_time = event_register(sleep_time, EV_TIME,
766 handle_dumpers_time, &runq);
768 } else if (diskp != NULL) {
769 sched(diskp)->act_size = 0;
770 allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
771 sched(diskp)->activehd = assign_holdingdisk(holdp, diskp);
773 sched(diskp)->destname = newstralloc(sched(diskp)->destname,
774 sched(diskp)->holdp[0]->destname);
775 diskp->host->inprogress++; /* host is now busy */
776 diskp->inprogress = 1;
777 sched(diskp)->dumper = dumper;
778 sched(diskp)->timestamp = now;
780 dumper->ev_read = event_register(dumper->fd, EV_READFD,
781 handle_dumper_result, dumper);
782 dumper->busy = 1; /* dumper is now busy */
783 dumper->dp = diskp; /* link disk to dumper */
784 remove_disk(rq, diskp); /* take it off the run queue */
786 sched(diskp)->origsize = -1;
787 sched(diskp)->dumpsize = -1;
788 sched(diskp)->dumptime = -1;
789 sched(diskp)->tapetime = -1;
790 chunker = dumper->chunker;
791 chunker->result = LAST_TOK;
792 dumper->result = LAST_TOK;
793 startup_chunk_process(chunker,chunker_program);
794 chunker_cmd(chunker, START, (void *)datestamp);
795 chunker->dumper = dumper;
796 chunker_cmd(chunker, PORT_WRITE, diskp);
797 cmd = getresult(chunker->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
799 printf("driver: did not get PORT from %s for %s:%s\n",
800 chunker->name, diskp->host->hostname, diskp->name);
802 return ; /* fatal problem */
804 chunker->ev_read = event_register(chunker->fd, EV_READFD,
805 handle_chunker_result, chunker);
806 dumper->output_port = atoi(result_argv[2]);
808 dumper_cmd(dumper, PORT_DUMP, diskp);
810 diskp->host->start_t = now + 15;
811 } else if (/* cur_idle != NOT_IDLE && */
812 (num_busy_dumpers() > 0 || taper_busy)) {
814 * We are constrained.
821 * This gets called when a dumper is delayed for some reason. It may
822 * be because a disk has a delayed start, or amanda is constrained
823 * by network or disk limits.
826 handle_dumpers_time(cookie)
829 disklist_t *runq = cookie;
830 event_release(dumpers_ev_time);
831 dumpers_ev_time = NULL;
832 start_some_dumps(runq);
836 dump_schedule(qp, str)
842 printf("dump of driver schedule %s:\n--------\n", str);
844 for(dp = qp->head; dp != NULL; dp = dp->next) {
845 printf(" %-20s %-25s lv %d t %5ld s %8lu p %d\n",
846 dp->host->hostname, dp->name, sched(dp)->level,
847 sched(dp)->est_time, sched(dp)->est_size, sched(dp)->priority);
849 printf("--------\n");
853 start_degraded_mode(queuep)
858 unsigned long est_full_size;
860 if (taper_ev_read != NULL) {
861 event_release(taper_ev_read);
862 taper_ev_read = NULL;
865 newq.head = newq.tail = 0;
867 dump_schedule(queuep, "before start degraded mode");
870 while(!empty(*queuep)) {
871 dp = dequeue_disk(queuep);
873 if(sched(dp)->level != 0)
874 /* go ahead and do the disk as-is */
875 enqueue_disk(&newq, dp);
877 if (reserved_space + est_full_size + sched(dp)->est_size
879 enqueue_disk(&newq, dp);
880 est_full_size += sched(dp)->est_size;
882 else if(sched(dp)->degr_level != -1) {
883 sched(dp)->level = sched(dp)->degr_level;
884 sched(dp)->dumpdate = sched(dp)->degr_dumpdate;
885 sched(dp)->est_size = sched(dp)->degr_size;
886 sched(dp)->est_time = sched(dp)->degr_time;
887 sched(dp)->est_kps = sched(dp)->degr_kps;
888 enqueue_disk(&newq, dp);
891 log_add(L_FAIL,"%s %s %s %d [can't switch to incremental dump]",
892 dp->host->hostname, dp->name, sched(dp)->datestamp,
901 dump_schedule(queuep, "after start degraded mode");
905 static void continue_port_dumps()
909 int active_dumpers=0, busy_dumpers=0, i;
912 /* First we try to grant diskspace to some dumps waiting for it. */
913 for( dp = roomq.head; dp; dp = ndp ) {
915 /* find last holdingdisk used by this dump */
916 for( i = 0, h = sched(dp)->holdp; h[i+1]; i++ );
917 /* find more space */
918 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
919 &active_dumpers, h[i] );
921 for(dumper = dmptable; dumper < dmptable + inparallel &&
922 dumper->dp != dp; dumper++);
923 assert( dumper < dmptable + inparallel );
924 sched(dp)->activehd = assign_holdingdisk( h, dp );
925 chunker_cmd( dumper->chunker, CONTINUE, dp );
927 remove_disk( &roomq, dp );
931 /* So for some disks there is less holding diskspace available than
932 * was asked for. Possible reasons are
933 * a) diskspace has been allocated for other dumps which are
934 * still running or already being written to tape
935 * b) all other dumps have been suspended due to lack of diskspace
936 * c) this dump doesn't fit on all the holding disks
937 * Case a) is not a problem. We just wait for the diskspace to
938 * be freed by moving the current disk to a queue.
939 * If case b) occurs, we have a deadlock situation. We select
940 * a dump from the queue to be aborted and abort it. It will
941 * be retried later dumping to disk.
942 * If case c) is detected, the dump is aborted. Next time
943 * it will be dumped directly to tape. Actually, case c is a special
944 * manifestation of case b) where only one dumper is busy.
946 for(dp=NULL, dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
949 if( !find_disk(&roomq, dumper->dp) ) {
952 sched(dp)->est_size > sched(dumper->dp)->est_size ) {
957 if((dp != NULL) && (active_dumpers == 0) && (busy_dumpers > 0) &&
958 ((!taper_busy && empty(tapeq)) || degraded_mode) &&
959 pending_aborts == 0 ) { /* not case a */
960 if( busy_dumpers == 1 ) { /* case c */
961 sched(dp)->no_space = 1;
964 /* At this time, dp points to the dump with the smallest est_size.
965 * We abort that dump, hopefully not wasting too much time retrying it.
967 remove_disk( &roomq, dp );
968 chunker_cmd( sched(dp)->dumper->chunker, ABORT, NULL );
969 dumper_cmd( sched(dp)->dumper, ABORT, NULL );
976 handle_taper_result(void *cookie)
982 char *result_argv[MAX_ARGS+1];
985 assert(cookie == NULL);
991 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
996 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
997 if(result_argc != 5) {
998 error("error: [taper DONE result_argc != 5: %d", result_argc);
1001 dp = serial2disk(result_argv[2]);
1002 free_serial(result_argv[2]);
1004 filenum = atoi(result_argv[4]);
1006 update_info_taper(dp, result_argv[3], filenum,
1010 delete_diskspace(dp);
1012 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1013 walltime_str(curclock()), dp->host->hostname, dp->name);
1016 amfree(sched(dp)->destname);
1017 amfree(sched(dp)->dumpdate);
1018 amfree(sched(dp)->degr_dumpdate);
1019 amfree(sched(dp)->datestamp);
1026 /* continue with those dumps waiting for diskspace */
1027 continue_port_dumps();
1030 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
1031 if (result_argc < 2) {
1032 error("error [taper TRYAGAIN result_argc < 2: %d]",
1035 dp = serial2disk(result_argv[2]);
1036 free_serial(result_argv[2]);
1037 printf("driver: taper-tryagain time %s disk %s:%s\n",
1038 walltime_str(curclock()), dp->host->hostname, dp->name);
1041 /* See how many tapes we have left, but we alwyays
1042 retry once (why?) */
1044 if(dp->tape_splitsize > 0)
1045 avail_tapes = conf_runtapes - current_tape;
1049 if(sched(dp)->attempted > avail_tapes) {
1050 log_add(L_FAIL, "%s %s %s %d [too many taper retries]",
1051 dp->host->hostname, dp->name, sched(dp)->datestamp,
1053 printf("driver: taper failed %s %s %s, too many taper retry\n",
1054 result_argv[2], dp->host->hostname, dp->name);
1057 /* Re-insert into taper queue. */
1058 sched(dp)->attempted++;
1059 headqueue_disk(&tapeq, dp);
1062 tape_left = tape_length;
1064 /* run next thing from queue */
1069 continue_port_dumps();
1072 case SPLIT_CONTINUE: /* SPLIT_CONTINUE <handle> <new_label> */
1073 if (result_argc != 3) {
1074 error("error [taper SPLIT_CONTINUE result_argc != 3: %d]",
1079 case SPLIT_NEEDNEXT: /* SPLIT-NEEDNEXT <handle> <kb written> */
1080 if (result_argc != 3) {
1081 error("error [taper SPLIT_NEEDNEXT result_argc != 3: %d]",
1085 /* Update our tape counter and reset tape_left */
1087 tape_left = tape_length;
1089 /* Reduce the size of the dump by amount written and reduce
1090 tape_left by the amount left over */
1091 dp = serial2disk(result_argv[2]);
1092 sched(dp)->act_size -= atoi(result_argv[3]);
1093 if (sched(dp)->act_size < tape_left)
1094 tape_left -= sched(dp)->act_size;
1100 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
1101 dp = serial2disk(result_argv[2]);
1102 free_serial(result_argv[2]);
1103 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1104 walltime_str(curclock()), dp->host->hostname, dp->name);
1106 log_add(L_WARNING, "Taper error: %s", result_argv[3]);
1111 log_add(L_WARNING, "Taper protocol error");
1114 * Since we've gotten a taper error, we can't send anything more
1115 * to the taper. Go into degraded mode to try to get everthing
1116 * onto disk. Later, these dumps can be flushed to a new tape.
1117 * The tape queue is zapped so that it appears empty in future
1118 * checks. If there are dumps waiting for diskspace to be freed,
1123 "going into degraded mode because of taper component error.");
1124 start_degraded_mode(&runq);
1126 tapeq.head = tapeq.tail = NULL;
1129 if(taper_ev_read != NULL) {
1130 event_release(taper_ev_read);
1131 taper_ev_read = NULL;
1133 if(cmd != TAPE_ERROR) aclose(taper);
1134 continue_port_dumps();
1138 error("driver received unexpected token (%s) from taper",
1142 * Wakeup any dumpers that are sleeping because of network
1143 * or disk constraints.
1145 start_some_dumps(&runq);
1147 } while(areads_dataready(taper));
1155 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1156 if(!dumper->busy && !dumper->down) return dumper;
1168 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1169 if(dumper->busy) n += 1;
1181 assignedhd_t **h=NULL;
1182 int activehd, i, dummy;
1186 dumper = sched(dp)->dumper;
1187 chunker = dumper->chunker;
1191 h = sched(dp)->holdp;
1192 activehd = sched(dp)->activehd;
1194 if(dumper->result == DONE && chunker->result == DONE) {
1195 update_info_dumper(dp, sched(dp)->origsize,
1196 sched(dp)->dumpsize, sched(dp)->dumptime);
1199 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1201 is_partial = dumper->result != DONE || chunker->result != DONE;
1202 rename_tmp_holding(sched(dp)->destname, !is_partial);
1205 for( i = 0, h = sched(dp)->holdp; i < activehd; i++ ) {
1206 dummy += h[i]->used;
1209 size = size_holding_files(sched(dp)->destname, 0);
1210 h[activehd]->used = size - dummy;
1211 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1212 adjust_diskspace(dp, DONE);
1214 sched(dp)->attempted += 1;
1216 if((dumper->result != DONE || chunker->result != DONE) &&
1217 sched(dp)->attempted <= 1) {
1218 delete_diskspace(dp);
1219 enqueue_disk(&runq, dp);
1221 else if(size > DISK_BLOCK_KB) {
1222 sched(dp)->attempted = 0;
1223 enqueue_disk(&tapeq, dp);
1227 delete_diskspace(dp);
1231 dp->host->inprogress -= 1;
1234 waitpid(chunker->pid, NULL, 0 );
1235 aclose(chunker->fd);
1240 continue_port_dumps();
1242 * Wakeup any dumpers that are sleeping because of network
1243 * or disk constraints.
1245 start_some_dumps(&runq);
1250 handle_dumper_result(cookie)
1253 /*static int pending_aborts = 0;*/
1254 dumper_t *dumper = cookie;
1258 char *result_argv[MAX_ARGS+1];
1260 assert(dumper != NULL);
1262 assert(dp != NULL && sched(dp) != NULL);
1268 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1271 /* result_argv[2] always contains the serial number */
1272 sdp = serial2disk(result_argv[2]);
1278 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
1279 if(result_argc != 6) {
1280 error("error [dumper DONE result_argc != 6: %d]", result_argc);
1283 /*free_serial(result_argv[2]);*/
1285 sched(dp)->origsize = (long)atof(result_argv[3]);
1286 sched(dp)->dumptime = (long)atof(result_argv[5]);
1288 printf("driver: finished-cmd time %s %s dumped %s:%s\n",
1289 walltime_str(curclock()), dumper->name,
1290 dp->host->hostname, dp->name);
1293 dumper->result = cmd;
1297 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1299 * Requeue this disk, and fall through to the FAILED
1302 if(sched(dp)->attempted) {
1303 log_add(L_FAIL, "%s %s %s %d [too many dumper retry: %s]",
1304 dp->host->hostname, dp->name, sched(dp)->datestamp,
1305 sched(dp)->level, result_argv[3]);
1306 printf("driver: dump failed %s %s %s, too many dumper retry: %s\n",
1307 result_argv[2], dp->host->hostname, dp->name,
1311 case FAILED: /* FAILED <handle> <errstr> */
1312 /*free_serial(result_argv[2]);*/
1313 dumper->result = cmd;
1316 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1318 * We sent an ABORT from the NO-ROOM case because this dump
1319 * wasn't going to fit onto the holding disk. We now need to
1320 * clean up the remains of this image, and try to finish
1321 * other dumps that are waiting on disk space.
1323 assert(pending_aborts);
1324 /*free_serial(result_argv[2]);*/
1325 dumper->result = cmd;
1329 /* either EOF or garbage from dumper. Turn it off */
1330 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1331 dumper->name, (long)dumper->pid);
1332 event_release(dumper->ev_read);
1333 dumper->ev_read = NULL;
1336 dumper->down = 1; /* mark it down so it isn't used again */
1338 /* if it was dumping something, zap it and try again */
1339 if(sched(dp)->attempted) {
1340 log_add(L_FAIL, "%s %s %s %d [%s died]",
1341 dp->host->hostname, dp->name, sched(dp)->datestamp,
1342 sched(dp)->level, dumper->name);
1345 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1346 dumper->name, dp->host->hostname, dp->name,
1350 dumper->result = cmd;
1356 /* send the dumper result to the chunker */
1357 if(dumper->chunker->down == 0 && dumper->chunker->fd != -1) {
1359 chunker_cmd(dumper->chunker, DONE, dp);
1362 chunker_cmd(dumper->chunker, FAILED, dp);
1366 if(dumper->result != LAST_TOK && dumper->chunker->result != LAST_TOK)
1369 } while(areads_dataready(dumper->fd));
1374 handle_chunker_result(cookie)
1377 /*static int pending_aborts = 0;*/
1378 chunker_t *chunker = cookie;
1379 assignedhd_t **h=NULL;
1384 char *result_argv[MAX_ARGS+1];
1389 assert(chunker != NULL);
1390 dumper = chunker->dumper;
1391 assert(dumper != NULL);
1394 assert(sched(dp) != NULL);
1395 assert(sched(dp)->destname != NULL);
1396 assert(dp != NULL && sched(dp) != NULL && sched(dp)->destname);
1398 if(dp && sched(dp) && sched(dp)->holdp) {
1399 h = sched(dp)->holdp;
1400 activehd = sched(dp)->activehd;
1407 cmd = getresult(chunker->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1410 /* result_argv[2] always contains the serial number */
1411 sdp = serial2disk(result_argv[2]);
1417 case PARTIAL: /* PARTIAL <handle> <dumpsize> <errstr> */
1418 case DONE: /* DONE <handle> <dumpsize> <errstr> */
1419 if(result_argc != 4) {
1420 error("error [chunker %s result_argc != 4: %d]", cmdstr[cmd],
1423 /*free_serial(result_argv[2]);*/
1425 sched(dp)->dumpsize = (long)atof(result_argv[3]);
1427 printf("driver: finished-cmd time %s %s chunked %s:%s\n",
1428 walltime_str(curclock()), chunker->name,
1429 dp->host->hostname, dp->name);
1432 event_release(chunker->ev_read);
1434 chunker->result = cmd;
1438 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1440 event_release(chunker->ev_read);
1443 case FAILED: /* FAILED <handle> <errstr> */
1444 /*free_serial(result_argv[2]);*/
1446 event_release(chunker->ev_read);
1448 chunker->result = cmd;
1452 case NO_ROOM: /* NO-ROOM <handle> <missing_size> */
1453 assert( h && activehd >= 0 );
1454 h[activehd]->used -= atoi(result_argv[3]);
1455 h[activehd]->reserved -= atoi(result_argv[3]);
1456 holdalloc(h[activehd]->disk)->allocated_space -= atoi(result_argv[3]);
1457 h[activehd]->disk->disksize -= atoi(result_argv[3]);
1460 case RQ_MORE_DISK: /* RQ-MORE-DISK <handle> */
1461 assert( h && activehd >= 0 );
1462 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1463 h[activehd]->used = h[activehd]->reserved;
1464 if( h[++activehd] ) { /* There's still some allocated space left.
1465 * Tell the dumper about it. */
1466 sched(dp)->activehd++;
1467 chunker_cmd( chunker, CONTINUE, dp );
1468 } else { /* !h[++activehd] - must allocate more space */
1469 sched(dp)->act_size = sched(dp)->est_size; /* not quite true */
1470 sched(dp)->est_size = (sched(dp)->act_size/20) * 21; /* +5% */
1471 sched(dp)->est_size = am_round(sched(dp)->est_size, DISK_BLOCK_KB);
1472 if(sched(dp)->est_size <= sched(dp)->act_size + DISK_BLOCK_KB)
1473 sched(dp)->est_size = sched(dp)->act_size +
1475 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1479 /* No diskspace available. The reason for this will be
1480 * determined in continue_port_dumps(). */
1481 enqueue_disk( &roomq, dp );
1482 continue_port_dumps();
1484 /* OK, allocate space for disk and have chunker continue */
1485 sched(dp)->activehd = assign_holdingdisk( h, dp );
1486 chunker_cmd( chunker, CONTINUE, dp );
1492 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1494 * We sent an ABORT from the NO-ROOM case because this dump
1495 * wasn't going to fit onto the holding disk. We now need to
1496 * clean up the remains of this image, and try to finish
1497 * other dumps that are waiting on disk space.
1499 /*assert(pending_aborts);*/
1501 /*free_serial(result_argv[2]);*/
1503 event_release(chunker->ev_read);
1505 chunker->result = cmd;
1510 /* either EOF or garbage from chunker. Turn it off */
1511 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1512 chunker->name, (long)chunker->pid);
1515 /* if it was dumping something, zap it and try again */
1516 assert( h && activehd >= 0 );
1517 if(sched(dp)->attempted) {
1518 log_add(L_FAIL, "%s %s %s %d [%s died]",
1519 dp->host->hostname, dp->name, sched(dp)->datestamp,
1520 sched(dp)->level, chunker->name);
1523 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1524 chunker->name, dp->host->hostname, dp->name,
1530 event_release(chunker->ev_read);
1532 chunker->result = cmd;
1540 if(chunker->result != LAST_TOK && chunker->dumper->result != LAST_TOK)
1543 } while(areads_dataready(chunker->fd));
1554 char *hostname, *diskname, *datestamp;
1558 char *inpline = NULL;
1564 tq.head = tq.tail = NULL;
1566 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1572 skip_whitespace(s, ch); /* find the command */
1574 error("flush line %d: syntax error (no command)", line);
1578 skip_non_whitespace(s, ch);
1581 if(strcmp(command,"ENDFLUSH") == 0) {
1585 if(strcmp(command,"FLUSH") != 0) {
1586 error("flush line %d: syntax error (%s != FLUSH)", line, command);
1590 skip_whitespace(s, ch); /* find the hostname */
1592 error("flush line %d: syntax error (no hostname)", line);
1596 skip_non_whitespace(s, ch);
1599 skip_whitespace(s, ch); /* find the diskname */
1601 error("flush line %d: syntax error (no diskname)", line);
1605 skip_non_whitespace(s, ch);
1608 skip_whitespace(s, ch); /* find the datestamp */
1610 error("flush line %d: syntax error (no datestamp)", line);
1614 skip_non_whitespace(s, ch);
1617 skip_whitespace(s, ch); /* find the level number */
1618 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1619 error("flush line %d: syntax error (bad level)", line);
1622 skip_integer(s, ch);
1624 skip_whitespace(s, ch); /* find the filename */
1626 error("flush line %d: syntax error (no filename)", line);
1630 skip_non_whitespace(s, ch);
1633 get_dumpfile(destname, &file);
1634 if( file.type != F_DUMPFILE) {
1635 if( file.type != F_CONT_DUMPFILE )
1636 log_add(L_INFO, "%s: ignoring cruft file.", destname);
1640 if(strcmp(hostname, file.name) != 0 ||
1641 strcmp(diskname, file.disk) != 0 ||
1642 strcmp(datestamp, file.datestamp) != 0) {
1643 log_add(L_INFO, "disk %s:%s not consistent with file %s",
1644 hostname, diskname, destname);
1648 dp = lookup_disk(file.name, file.disk);
1651 log_add(L_INFO, "%s: disk %s:%s not in database, skipping it.",
1652 destname, file.name, file.disk);
1656 if(file.dumplevel < 0 || file.dumplevel > 9) {
1657 log_add(L_INFO, "%s: ignoring file with bogus dump level %d.",
1658 destname, file.dumplevel);
1662 dp1 = (disk_t *)alloc(sizeof(disk_t));
1664 dp1->next = dp1->prev = NULL;
1666 /* add it to the flushhost list */
1668 flushhost = alloc(sizeof(am_host_t));
1669 flushhost->next = NULL;
1670 flushhost->hostname = stralloc("FLUSHHOST");
1671 flushhost->up = NULL;
1672 flushhost->features = NULL;
1674 dp1->hostnext = flushhost->disks;
1675 flushhost->disks = dp1;
1677 sp = (sched_t *) alloc(sizeof(sched_t));
1678 sp->destname = stralloc(destname);
1679 sp->level = file.dumplevel;
1680 sp->dumpdate = NULL;
1681 sp->degr_dumpdate = NULL;
1682 sp->datestamp = stralloc(file.datestamp);
1686 sp->degr_level = -1;
1689 sp->act_size = size_holding_files(destname, 0);
1690 sp->holdp = build_diskspace(destname);
1691 if(sp->holdp == NULL) continue;
1693 sp->timestamp = (time_t)0;
1695 dp1->up = (char *)sp;
1697 enqueue_disk(&tq, dp1);
1705 read_schedule(cookie)
1711 int level, line, priority;
1712 char *dumpdate, *degr_dumpdate;
1714 long time, degr_time;
1715 unsigned long size, degr_size;
1716 char *hostname, *features, *diskname, *datestamp, *inpline = NULL;
1720 long flush_size = 0;
1722 rq.head = rq.tail = NULL;
1724 event_release(schedule_ev_read);
1726 /* read schedule from stdin */
1728 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1734 skip_whitespace(s, ch); /* find the command */
1736 error("schedule line %d: syntax error (no command)", line);
1740 skip_non_whitespace(s, ch);
1743 if(strcmp(command,"DUMP") != 0) {
1744 error("schedule line %d: syntax error (%s != DUMP)", line, command);
1748 skip_whitespace(s, ch); /* find the host name */
1750 error("schedule line %d: syntax error (no host name)", line);
1754 skip_non_whitespace(s, ch);
1757 skip_whitespace(s, ch); /* find the feature list */
1759 error("schedule line %d: syntax error (no feature list)", line);
1763 skip_non_whitespace(s, ch);
1766 skip_whitespace(s, ch); /* find the disk name */
1768 error("schedule line %d: syntax error (no disk name)", line);
1772 skip_non_whitespace(s, ch);
1775 skip_whitespace(s, ch); /* find the datestamp */
1777 error("schedule line %d: syntax error (no datestamp)", line);
1781 skip_non_whitespace(s, ch);
1784 skip_whitespace(s, ch); /* find the priority number */
1785 if(ch == '\0' || sscanf(s - 1, "%d", &priority) != 1) {
1786 error("schedule line %d: syntax error (bad priority)", line);
1789 skip_integer(s, ch);
1791 skip_whitespace(s, ch); /* find the level number */
1792 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1793 error("schedule line %d: syntax error (bad level)", line);
1796 skip_integer(s, ch);
1798 skip_whitespace(s, ch); /* find the dump date */
1800 error("schedule line %d: syntax error (bad dump date)", line);
1804 skip_non_whitespace(s, ch);
1807 skip_whitespace(s, ch); /* find the size number */
1808 if(ch == '\0' || sscanf(s - 1, "%lu", &size) != 1) {
1809 error("schedule line %d: syntax error (bad size)", line);
1812 skip_integer(s, ch);
1814 skip_whitespace(s, ch); /* find the time number */
1815 if(ch == '\0' || sscanf(s - 1, "%ld", &time) != 1) {
1816 error("schedule line %d: syntax error (bad estimated time)", line);
1819 skip_integer(s, ch);
1821 degr_dumpdate = NULL; /* flag if degr fields found */
1822 skip_whitespace(s, ch); /* find the degr level number */
1824 if(sscanf(s - 1, "%d", °r_level) != 1) {
1825 error("schedule line %d: syntax error (bad degr level)", line);
1828 skip_integer(s, ch);
1830 skip_whitespace(s, ch); /* find the degr dump date */
1832 error("schedule line %d: syntax error (bad degr dump date)", line);
1835 degr_dumpdate = s - 1;
1836 skip_non_whitespace(s, ch);
1839 skip_whitespace(s, ch); /* find the degr size number */
1840 if(ch == '\0' || sscanf(s - 1, "%lu", °r_size) != 1) {
1841 error("schedule line %d: syntax error (bad degr size)", line);
1844 skip_integer(s, ch);
1846 skip_whitespace(s, ch); /* find the degr time number */
1847 if(ch == '\0' || sscanf(s - 1, "%lu", °r_time) != 1) {
1848 error("schedule line %d: syntax error (bad degr estimated time)", line);
1851 skip_integer(s, ch);
1854 dp = lookup_disk(hostname, diskname);
1857 "schedule line %d: %s:%s not in disklist, ignored",
1858 line, hostname, diskname);
1862 sp = (sched_t *) alloc(sizeof(sched_t));
1864 sp->dumpdate = stralloc(dumpdate);
1865 sp->est_size = DISK_BLOCK_KB + size; /* include header */
1866 sp->est_time = time;
1867 sp->priority = priority;
1868 sp->datestamp = stralloc(datestamp);
1871 sp->degr_level = degr_level;
1872 sp->degr_dumpdate = stralloc(degr_dumpdate);
1873 sp->degr_size = DISK_BLOCK_KB + degr_size;
1874 sp->degr_time = degr_time;
1876 sp->degr_level = -1;
1877 sp->degr_dumpdate = NULL;
1883 sp->est_kps = size/time;
1885 if(sp->degr_level != -1) {
1889 sp->degr_kps = degr_size/degr_time;
1897 sp->timestamp = (time_t)0;
1898 sp->destname = NULL;
1901 dp->up = (char *) sp;
1902 if(dp->host->features == NULL) {
1903 dp->host->features = am_string_to_feature(features);
1905 remove_disk(&waitq, dp);
1906 enqueue_disk(&runq, dp);
1907 flush_size += sp->act_size;
1909 printf("driver: flush size %ld\n", flush_size);
1912 log_add(L_WARNING, "WARNING: got empty schedule from planner");
1913 if(need_degraded==1) start_degraded_mode(&runq);
1914 start_some_dumps(&runq);
1923 if (ip == (interface_t *)0) {
1927 for(p = lookup_interface(NULL); p != NULL; p = p->next) {
1928 maxusage += p->maxusage;
1929 curusage += p->curusage;
1931 res = maxusage - curusage;
1934 res = ip->maxusage - ip->curusage;
1941 interface_state(time_str)
1946 printf("driver: interface-state time %s", time_str);
1948 for(ip = lookup_interface(NULL); ip != NULL; ip = ip->next) {
1949 printf(" if %s: free %d", ip->name, free_kps(ip));
1955 allocate_bandwidth(ip, kps)
1959 ip->curusage += kps;
1963 deallocate_bandwidth(ip, kps)
1967 assert(kps <= ip->curusage);
1968 ip->curusage -= kps;
1972 static unsigned long
1976 unsigned long total_free;
1980 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
1981 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
1988 static assignedhd_t **
1989 find_diskspace(size, cur_idle, pref)
1993 /* We return an array of pointers to assignedhd_t. The array contains at
1994 * most one entry per holding disk. The list of pointers is terminated by
1995 * a NULL pointer. Each entry contains a pointer to a holdingdisk and
1996 * how much diskspace to use on that disk. Later on, assign_holdingdisk
1997 * will allocate the given amount of space.
1998 * If there is not enough room on the holdingdisks, NULL is returned.
2002 assignedhd_t **result = NULL;
2003 holdingdisk_t *minp, *hdp;
2004 int i=0, num_holdingdisks=0; /* are we allowed to use the global thing? */
2007 long halloc, dalloc, hfree, dfree;
2009 size = am_round(size, DISK_BLOCK_KB);
2012 printf("%s: want %lu K\n", debug_prefix_time(": find_diskspace"), size);
2016 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2020 used = alloc(sizeof(char) * num_holdingdisks);/*disks used during this run*/
2021 memset( used, 0, num_holdingdisks );
2022 result = alloc( sizeof(assignedhd_t *) * (num_holdingdisks+1) );
2025 while( i < num_holdingdisks && size > 0 ) {
2026 /* find the holdingdisk with the fewest active dumpers and among
2027 * those the one with the biggest free space
2029 minp = NULL; minj = -1;
2030 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next, j++ ) {
2031 if( pref && pref->disk == hdp && !used[j] &&
2032 holdalloc(hdp)->allocated_space <= hdp->disksize - DISK_BLOCK_KB) {
2037 else if( holdalloc(hdp)->allocated_space <= hdp->disksize - 2*DISK_BLOCK_KB &&
2040 holdalloc(hdp)->allocated_dumpers < holdalloc(minp)->allocated_dumpers ||
2041 (holdalloc(hdp)->allocated_dumpers == holdalloc(minp)->allocated_dumpers &&
2042 hdp->disksize-holdalloc(hdp)->allocated_space > minp->disksize-holdalloc(minp)->allocated_space)) ) {
2049 if( !minp ) { break; } /* all holding disks are full */
2052 /* hfree = free space on the disk */
2053 hfree = minp->disksize - holdalloc(minp)->allocated_space;
2055 /* dfree = free space for data, remove 1 header for each chunksize */
2056 dfree = hfree - (((hfree-1)/minp->chunksize)+1) * DISK_BLOCK_KB;
2058 /* dalloc = space I can allocate for data */
2059 dalloc = ( dfree < size ) ? dfree : size;
2061 /* halloc = space to allocate, including 1 header for each chunksize */
2062 halloc = dalloc + (((dalloc-1)/minp->chunksize)+1) * DISK_BLOCK_KB;
2065 printf("%s: find diskspace: size %ld hf %ld df %ld da %ld ha %ld\n",
2066 debug_prefix_time(": find_diskspace"),
2067 size, hfree, dfree, dalloc, halloc);
2071 result[i] = alloc(sizeof(assignedhd_t));
2072 result[i]->disk = minp;
2073 result[i]->reserved = halloc;
2074 result[i]->used = 0;
2075 result[i]->destname = NULL;
2078 } /* while i < num_holdingdisks && size > 0 */
2081 if( size ) { /* not enough space available */
2082 printf("find diskspace: not enough diskspace. Left with %lu K\n", size);
2084 free_assignedhd(result);
2089 for( i = 0; result && result[i]; i++ ) {
2090 printf("%s: find diskspace: selected %s free %ld reserved %ld dumpers %d\n",
2091 debug_prefix_time(": find_diskspace"),
2092 result[i]->disk->diskdir,
2093 result[i]->disk->disksize - holdalloc(result[i]->disk)->allocated_space,
2094 result[i]->reserved,
2095 holdalloc(result[i]->disk)->allocated_dumpers);
2104 assign_holdingdisk(holdp, diskp)
2105 assignedhd_t **holdp;
2110 char *sfn = sanitise_filename(diskp->name);
2112 assignedhd_t **new_holdp;
2114 snprintf( lvl, sizeof(lvl), "%d", sched(diskp)->level );
2116 size = am_round(sched(diskp)->est_size - sched(diskp)->act_size,
2119 for( c = 0; holdp[c]; c++ ); /* count number of disks */
2121 /* allocate memory for sched(diskp)->holdp */
2122 for(j = 0; sched(diskp)->holdp && sched(diskp)->holdp[j]; j++) {}
2123 new_holdp = (assignedhd_t **)alloc(sizeof(assignedhd_t*)*(j+c+1));
2124 if (sched(diskp)->holdp) {
2125 memcpy(new_holdp, sched(diskp)->holdp, j * sizeof(*new_holdp));
2126 amfree(sched(diskp)->holdp);
2128 sched(diskp)->holdp = new_holdp;
2132 if( j > 0 ) { /* This is a request for additional diskspace. See if we can
2133 * merge assignedhd_t's */
2135 if( sched(diskp)->holdp[j-1]->disk == holdp[0]->disk ) { /* Yes! */
2136 sched(diskp)->holdp[j-1]->reserved += holdp[0]->reserved;
2137 holdalloc(holdp[0]->disk)->allocated_space += holdp[0]->reserved;
2138 size = (holdp[0]->reserved>size) ? 0 : size-holdp[0]->reserved;
2140 printf("%s: merging holding disk %s to disk %s:%s, add %lu for reserved %lu, left %lu\n",
2141 debug_prefix_time(": assign_holdingdisk"),
2142 sched(diskp)->holdp[j-1]->disk->diskdir,
2143 diskp->host->hostname, diskp->name,
2144 holdp[0]->reserved, sched(diskp)->holdp[j-1]->reserved,
2154 /* copy assignedhd_s to sched(diskp), adjust allocated_space */
2155 for( ; holdp[i]; i++ ) {
2156 holdp[i]->destname = newvstralloc( holdp[i]->destname,
2157 holdp[i]->disk->diskdir, "/",
2159 diskp->host->hostname, ".",
2162 sched(diskp)->holdp[j++] = holdp[i];
2163 holdalloc(holdp[i]->disk)->allocated_space += holdp[i]->reserved;
2164 size = (holdp[i]->reserved>size) ? 0 : size-holdp[i]->reserved;
2166 printf("%s: %d assigning holding disk %s to disk %s:%s, reserved %lu, left %lu\n",
2167 debug_prefix_time(": assign_holdingdisk"),
2168 i, holdp[i]->disk->diskdir, diskp->host->hostname, diskp->name,
2169 holdp[i]->reserved, size );
2172 holdp[i] = NULL; /* so it doesn't get free()d... */
2174 sched(diskp)->holdp[j] = NULL;
2181 adjust_diskspace(diskp, cmd)
2185 assignedhd_t **holdp;
2186 unsigned long total=0;
2191 printf("%s: %s:%s %s\n",
2192 debug_prefix_time(": adjust_diskspace"),
2193 diskp->host->hostname, diskp->name, sched(diskp)->destname);
2197 holdp = sched(diskp)->holdp;
2201 for( i = 0; holdp[i]; i++ ) { /* for each allocated disk */
2202 diff = holdp[i]->used - holdp[i]->reserved;
2203 total += holdp[i]->used;
2204 holdalloc(holdp[i]->disk)->allocated_space += diff;
2207 printf("%s: hdisk %s done, reserved %ld used %ld diff %ld alloc %ld dumpers %d\n",
2208 debug_prefix_time(": adjust_diskspace"),
2209 holdp[i]->disk->name, holdp[i]->reserved, holdp[i]->used, diff,
2210 holdalloc(holdp[i]->disk)->allocated_space,
2211 holdalloc(holdp[i]->disk)->allocated_dumpers );
2214 holdp[i]->reserved += diff;
2217 sched(diskp)->act_size = total;
2220 printf("%s: after: disk %s:%s used %ld\n",
2221 debug_prefix_time(": adjust_diskspace"),
2222 diskp->host->hostname, diskp->name, sched(diskp)->act_size );
2229 delete_diskspace(diskp)
2232 assignedhd_t **holdp;
2235 holdp = sched(diskp)->holdp;
2239 for( i = 0; holdp[i]; i++ ) { /* for each disk */
2240 /* find all files of this dump on that disk, and subtract their
2241 * reserved sizes from the disk's allocated space
2243 holdalloc(holdp[i]->disk)->allocated_space -= holdp[i]->used;
2246 unlink_holding_files(holdp[0]->destname); /* no need for the entire list,
2247 * because unlink_holding_files
2248 * will walk through all files
2249 * using cont_filename */
2250 free_assignedhd(sched(diskp)->holdp);
2251 sched(diskp)->holdp = NULL;
2252 sched(diskp)->act_size = 0;
2255 static assignedhd_t **build_diskspace(destname)
2261 char buffer[DISK_BLOCK_BYTES];
2263 assignedhd_t **result;
2266 int num_holdingdisks=0;
2267 char dirname[1000], *ch;
2269 char *filename = destname;
2271 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2274 used = alloc(sizeof(int) * num_holdingdisks);
2275 for(i=0;i<num_holdingdisks;i++)
2277 result = alloc( sizeof(assignedhd_t *) * (num_holdingdisks+1) );
2279 while(filename != NULL && filename[0] != '\0') {
2280 strncpy(dirname, filename, 999);
2282 ch = strrchr(dirname,'/');
2284 ch = strrchr(dirname,'/');
2287 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL;
2288 hdp = hdp->next, j++ ) {
2289 if(strcmp(dirname,hdp->diskdir)==0) {
2294 if(stat(filename, &finfo) == -1) {
2295 fprintf(stderr, "stat %s: %s\n", filename, strerror(errno));
2298 used[j] += (finfo.st_size+1023)/1024;
2299 if((fd = open(filename,O_RDONLY)) == -1) {
2300 fprintf(stderr,"build_diskspace: open of %s failed: %s\n",
2301 filename, strerror(errno));
2304 if ((buflen = fullread(fd, buffer, sizeof(buffer))) > 0) {;
2305 parse_file_header(buffer, &file, buflen);
2308 filename = file.cont_filename;
2311 for(j = 0, i=0, hdp = getconf_holdingdisks(); hdp != NULL;
2312 hdp = hdp->next, j++ ) {
2314 result[i] = alloc(sizeof(assignedhd_t));
2315 result[i]->disk = hdp;
2316 result[i]->reserved = used[j];
2317 result[i]->used = used[j];
2318 result[i]->destname = stralloc(destname);
2329 holdingdisk_state(time_str)
2336 printf("driver: hdisk-state time %s", time_str);
2338 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
2339 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2340 printf(" hdisk %d: free %ld dumpers %d", dsk, diff,
2341 holdalloc(hdp)->allocated_dumpers);
2347 update_failed_dump_to_tape(dp)
2351 * should simply set no_bump
2354 time_t save_timestamp = sched(dp)->timestamp;
2355 /* setting timestamp to 0 removes the current level from the
2356 * database, so that we ensure that it will not be bumped to the
2357 * next level on the next run. If we didn't do this, dumpdates or
2358 * gnutar-lists might have been updated already, and a bumped
2359 * incremental might be created. */
2360 sched(dp)->timestamp = 0;
2361 update_info_dumper(dp, -1, -1, -1);
2362 sched(dp)->timestamp = save_timestamp;
2365 /* ------------------- */
2379 char *result_argv[MAX_ARGS+1];
2380 int dumper_tryagain = 0;
2382 printf("driver: dumping %s:%s directly to tape\n",
2383 dp->host->hostname, dp->name);
2386 /* pick a dumper and fail if there are no idle dumpers */
2388 dumper = idle_dumper();
2390 printf("driver: no idle dumpers for %s:%s.\n",
2391 dp->host->hostname, dp->name);
2393 log_add(L_WARNING, "no idle dumpers for %s:%s.\n",
2394 dp->host->hostname, dp->name);
2395 return 2; /* fatal problem */
2398 /* tell the taper to read from a port number of its choice */
2400 taper_cmd(PORT_WRITE, dp, NULL, sched(dp)->level, sched(dp)->datestamp);
2401 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2403 printf("driver: did not get PORT from taper for %s:%s\n",
2404 dp->host->hostname, dp->name);
2406 return 2; /* fatal problem */
2408 /* copy port number */
2409 dumper->output_port = atoi(result_argv[2]);
2411 /* tell the dumper to dump to a port */
2413 dumper_cmd(dumper, PORT_DUMP, dp);
2414 dp->host->start_t = time(NULL) + 15;
2416 /* update statistics & print state */
2418 taper_busy = dumper->busy = 1;
2419 dp->host->inprogress += 1;
2421 sched(dp)->timestamp = time((time_t *)0);
2422 allocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2423 idle_reason = NOT_IDLE;
2427 /* wait for result from dumper */
2429 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
2432 free_serial(result_argv[2]);
2436 /* either eof or garbage from dumper */
2437 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
2438 dumper->name, (long)dumper->pid);
2439 dumper->down = 1; /* mark it down so it isn't used again */
2440 failed = 1; /* dump failed, must still finish up with taper */
2443 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
2444 /* everything went fine */
2445 origsize = (long)atof(result_argv[3]);
2446 /*dumpsize = (long)atof(result_argv[4]);*/
2447 dumptime = (long)atof(result_argv[5]);
2450 case NO_ROOM: /* NO-ROOM <handle> */
2451 dumper_cmd(dumper, ABORT, dp);
2452 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
2454 free_serial(result_argv[2]);
2455 assert(cmd == ABORT_FINISHED);
2457 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
2459 /* dump failed, but we must still finish up with taper */
2460 /* problem with dump, possibly nonfatal, retry one time */
2461 sched(dp)->attempted++;
2462 failed = sched(dp)->attempted;
2463 dumper_tryagain = 1;
2466 case FAILED: /* FAILED <handle> <errstr> */
2467 /* dump failed, but we must still finish up with taper */
2468 failed = 2; /* fatal problem with dump */
2473 * Note that at this point, even if the dump above failed, it may
2474 * not be a fatal failure if taper below says we can try again.
2475 * E.g. a dumper failure above may actually be the result of a
2476 * tape overflow, which in turn causes dump to see "broken pipe",
2477 * "no space on device", etc., since taper closed the port first.
2482 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2486 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
2487 if(result_argc != 5) {
2488 error("error [dump to tape DONE result_argc != 5: %d]", result_argc);
2491 if(failed == 1) goto tryagain; /* dump didn't work */
2492 else if(failed == 2) goto failed_dumper;
2494 free_serial(result_argv[2]);
2496 sscanf(result_argv[5],"[sec %f kb %ld ", &tapetime, &dumpsize);
2499 /* every thing went fine */
2500 update_info_dumper(dp, origsize, dumpsize, dumptime);
2501 filenum = atoi(result_argv[4]);
2502 update_info_taper(dp, result_argv[3], filenum, sched(dp)->level);
2503 /* note that update_info_dumper() must be run before
2504 update_info_taper(), since update_info_dumper overwrites
2505 tape information. */
2510 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
2511 tape_left = tape_length;
2513 if(dumper_tryagain == 0) {
2514 sched(dp)->attempted++;
2515 if(sched(dp)->attempted > failed)
2516 failed = sched(dp)->attempted;
2520 headqueue_disk(&runq, dp);
2522 update_failed_dump_to_tape(dp);
2523 free_serial(result_argv[2]);
2526 case SPLIT_CONTINUE: /* SPLIT_CONTINUE <handle> <new_label> */
2527 if (result_argc != 3) {
2528 error("error [taper SPLIT_CONTINUE result_argc != 3: %d]", result_argc);
2530 fprintf(stderr, "driver: Got SPLIT_CONTINUE %s %s\n", result_argv[2], result_argv[3]);
2532 goto continue_port_dump;
2534 case SPLIT_NEEDNEXT:
2535 fprintf(stderr, "driver: Got SPLIT_NEEDNEXT %s %s\n", result_argv[2], result_argv[3]);
2537 goto continue_port_dump;
2539 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
2542 update_failed_dump_to_tape(dp);
2543 free_serial(result_argv[2]);
2544 failed = 2; /* fatal problem */
2545 start_degraded_mode(&runq);
2548 /* reset statistics & return */
2550 taper_busy = dumper->busy = 0;
2551 dp->host->inprogress -= 1;
2553 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2565 for(len = 0, p = q.head; p != NULL; len++, p = p->next);
2575 wall_time = walltime_str(curclock());
2577 printf("driver: state time %s ", wall_time);
2578 printf("free kps: %d space: %lu taper: ",
2579 free_kps((interface_t *)0), free_space());
2580 if(degraded_mode) printf("DOWN");
2581 else if(!taper_busy) printf("idle");
2582 else printf("writing");
2584 for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
2585 printf(" idle-dumpers: %d", nidle);
2586 printf(" qlen tapeq: %d", queue_length(tapeq));
2587 printf(" runq: %d", queue_length(runq));
2588 printf(" roomq: %d", queue_length(roomq));
2589 printf(" wakeup: %d", (int)sleep_time);
2590 printf(" driver-idle: %s\n", idle_strings[idle_reason]);
2591 interface_state(wall_time);
2592 holdingdisk_state(wall_time);
2604 printf("================\n");
2605 printf("driver state at time %s: %s\n", walltime_str(curclock()), str);
2606 printf("free kps: %d, space: %lu\n", free_kps((interface_t *)0), free_space());
2607 if(degraded_mode) printf("taper: DOWN\n");
2608 else if(!taper_busy) printf("taper: idle\n");
2609 else printf("taper: writing %s:%s.%d est size %lu\n",
2610 taper_disk->host->hostname, taper_disk->name,
2611 sched(taper_disk)->level,
2612 sched(taper_disk)->est_size);
2613 for(i = 0; i < inparallel; i++) {
2614 dp = dmptable[i].dp;
2615 if(!dmptable[i].busy)
2616 printf("%s: idle\n", dmptable[i].name);
2618 printf("%s: dumping %s:%s.%d est kps %d size %lu time %ld\n",
2619 dmptable[i].name, dp->host->hostname, dp->name, sched(dp)->level,
2620 sched(dp)->est_kps, sched(dp)->est_size, sched(dp)->est_time);
2622 dump_queue("TAPE", tapeq, 5, stdout);
2623 dump_queue("ROOM", roomq, 5, stdout);
2624 dump_queue("RUN ", runq, 5, stdout);
2625 printf("================\n");