2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1998 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: driver.c,v 1.198 2006/08/24 01:57:16 paddy_s Exp $
29 * controlling process for the Amanda backup system
33 * XXX possibly modify tape queue to be cognizant of how much room is left on
34 * tape. Probably not effective though, should do this in planner.
50 #include "server_util.h"
52 static disklist_t waitq, runq, tapeq, roomq;
53 static int pending_aborts;
54 static disk_t *taper_disk;
55 static int degraded_mode;
56 static off_t reserved_space;
57 static off_t total_disksize;
58 static char *dumper_program;
59 static char *chunker_program;
60 static int inparallel;
61 static int nodump = 0;
62 static off_t tape_length = (off_t)0;
63 static off_t tape_left = (off_t)0;
64 static int current_tape = 1;
65 static int conf_taperalgo;
66 static int conf_runtapes;
67 static time_t sleep_time;
68 static int idle_reason;
69 static char *driver_timestamp;
70 static char *hd_driver_timestamp;
71 static am_host_t *flushhost = NULL;
72 static int need_degraded=0;
74 static event_handle_t *dumpers_ev_time = NULL;
75 static event_handle_t *schedule_ev_read = NULL;
77 static int wait_children(int count);
78 static void wait_for_children(void);
79 static void allocate_bandwidth(interface_t *ip, unsigned long kps);
80 static int assign_holdingdisk(assignedhd_t **holdp, disk_t *diskp);
81 static void adjust_diskspace(disk_t *diskp, cmd_t cmd);
82 static void delete_diskspace(disk_t *diskp);
83 static assignedhd_t **build_diskspace(char *destname);
84 static int client_constrained(disk_t *dp);
85 static void deallocate_bandwidth(interface_t *ip, unsigned long kps);
86 static void dump_schedule(disklist_t *qp, char *str);
87 static int dump_to_tape(disk_t *dp);
88 static assignedhd_t **find_diskspace(off_t size, int *cur_idle,
89 assignedhd_t *preferred);
90 static unsigned long free_kps(interface_t *ip);
91 static off_t free_space(void);
92 static void dumper_result(disk_t *dp);
93 static void handle_dumper_result(void *);
94 static void handle_chunker_result(void *);
95 static void handle_dumpers_time(void *);
96 static void handle_taper_result(void *);
97 static void holdingdisk_state(char *time_str);
98 static dumper_t *idle_dumper(void);
99 static void interface_state(char *time_str);
100 static int queue_length(disklist_t q);
101 static disklist_t read_flush(void);
102 static void read_schedule(void *cookie);
103 static void short_dump_state(void);
104 static void startaflush(void);
105 static void start_degraded_mode(disklist_t *queuep);
106 static void start_some_dumps(disklist_t *rq);
107 static void continue_port_dumps(void);
108 static void update_failed_dump_to_tape(disk_t *);
110 static void dump_state(const char *str);
112 int main(int main_argc, char **main_argv);
114 static const char *idle_strings[] = {
117 #define IDLE_NO_DUMPERS 1
119 #define IDLE_START_WAIT 2
121 #define IDLE_NO_HOLD 3
123 #define IDLE_CLIENT_CONSTRAINED 4
124 "client-constrained",
125 #define IDLE_NO_DISKSPACE 5
127 #define IDLE_TOO_LARGE 6
129 #define IDLE_NO_BANDWIDTH 7
131 #define IDLE_TAPER_WAIT 8
145 generic_fs_stats_t fs;
147 unsigned long malloc_hist_1, malloc_size_1;
148 unsigned long malloc_hist_2, malloc_size_2;
149 unsigned long reserve = 100;
154 char *result_argv[MAX_ARGS+1];
159 int new_argc, my_argc;
160 char **new_argv, **my_argv;
164 setvbuf(stdout, (char *)NULL, (int)_IOLBF, 0);
165 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
169 dbopen(DBG_SUBDIR_SERVER);
171 atexit(wait_for_children);
173 /* Don't die when child closes pipe */
174 signal(SIGPIPE, SIG_IGN);
176 malloc_size_1 = malloc_inuse(&malloc_hist_1);
178 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
179 set_logerror(logerror);
183 parse_server_conf(main_argc, main_argv, &new_argc, &new_argv);
187 printf("%s: pid %ld executable %s version %s\n",
188 get_pname(), (long) getpid(), my_argv[0], version());
191 config_name = stralloc(my_argv[1]);
192 config_dir = vstralloc(CONFIG_DIR, "/", config_name, "/", NULL);
194 if(strncmp(my_argv[2], "nodump", 6) == 0) {
201 char my_cwd[STR_SIZE];
203 if (getcwd(my_cwd, SIZEOF(my_cwd)) == NULL) {
204 error("cannot determine current working directory");
207 config_dir = stralloc2(my_cwd, "/");
208 if ((config_name = strrchr(my_cwd, '/')) != NULL) {
209 config_name = stralloc(config_name + 1);
215 conffile = stralloc2(config_dir, CONFFILE_NAME);
216 if(read_conffile(conffile)) {
217 error("errors processing config file \"%s\"", conffile);
222 dbrename(config_name, DBG_SUBDIR_SERVER);
224 report_bad_conf_arg();
226 amfree(driver_timestamp);
227 /* read timestamp from stdin */
228 while ((line = agets(stdin)) != NULL) {
233 if ( line == NULL ) {
234 error("Did not get DATE line from planner");
237 driver_timestamp = alloc(15);
238 strncpy(driver_timestamp, &line[5], 14);
239 driver_timestamp[14] = '\0';
241 log_add(L_START,"date %s", driver_timestamp);
243 /* check that we don't do many dump in a day and usetimestamps is off */
244 if(strlen(driver_timestamp) == 8) {
245 char *conf_logdir = getconf_str(CNF_LOGDIR);
246 char *logfile = vstralloc(conf_logdir, "/log.",
247 driver_timestamp, ".0", NULL);
248 char *oldlogfile = vstralloc(conf_logdir, "/oldlog/log.",
249 driver_timestamp, ".0", NULL);
250 if(access(logfile, F_OK) == 0 || access(oldlogfile, F_OK) == 0) {
251 log_add(L_WARNING, "WARNING: This is not the first amdump run today. Enable the usetimestamps option in the configuration file if you want to run amdump more than once per calendar day.");
255 hd_driver_timestamp = construct_timestamp(NULL);
258 hd_driver_timestamp = stralloc(driver_timestamp);
261 taper_program = vstralloc(libexecdir, "/", "taper", versionsuffix(), NULL);
262 dumper_program = vstralloc(libexecdir, "/", "dumper", versionsuffix(),
264 chunker_program = vstralloc(libexecdir, "/", "chunker", versionsuffix(),
267 conf_taperalgo = getconf_taperalgo(CNF_TAPERALGO);
268 conf_tapetype = getconf_str(CNF_TAPETYPE);
269 conf_runtapes = getconf_int(CNF_RUNTAPES);
270 tape = lookup_tapetype(conf_tapetype);
271 tape_length = tapetype_get_length(tape);
272 printf("driver: tape size " OFF_T_FMT "\n", (OFF_T_FMT_TYPE)tape_length);
274 /* start initializing: read in databases */
276 conf_diskfile = getconf_str(CNF_DISKFILE);
277 if (*conf_diskfile == '/') {
278 conf_diskfile = stralloc(conf_diskfile);
280 conf_diskfile = stralloc2(config_dir, conf_diskfile);
282 if (read_diskfile(conf_diskfile, &origq) < 0) {
283 error("could not load disklist \"%s\"", conf_diskfile);
286 amfree(conf_diskfile);
288 /* set up any configuration-dependent variables */
290 inparallel = getconf_int(CNF_INPARALLEL);
292 reserve = (unsigned long)getconf_int(CNF_RESERVE);
294 total_disksize = (off_t)0;
295 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
296 hdp->up = (void *)alloc(SIZEOF(holdalloc_t));
297 holdalloc(hdp)->allocated_dumpers = 0;
298 holdalloc(hdp)->allocated_space = (off_t)0;
300 if(get_fs_stats(holdingdisk_get_diskdir(hdp), &fs) == -1
301 || access(holdingdisk_get_diskdir(hdp), W_OK) == -1) {
302 log_add(L_WARNING, "WARNING: ignoring holding disk %s: %s\n",
303 holdingdisk_get_diskdir(hdp), strerror(errno));
308 if(fs.avail != (off_t)-1) {
309 if(hdp->disksize > (off_t)0) {
310 if(hdp->disksize > fs.avail) {
312 "WARNING: %s: " OFF_T_FMT " KB requested, "
313 "but only " OFF_T_FMT " KB available.",
314 holdingdisk_get_diskdir(hdp),
315 (OFF_T_FMT_TYPE)hdp->disksize,
316 (OFF_T_FMT_TYPE)fs.avail);
317 hdp->disksize = fs.avail;
320 else if((fs.avail + hdp->disksize) < (off_t)0) {
322 "WARNING: %s: not " OFF_T_FMT " KB free.",
323 holdingdisk_get_diskdir(hdp), -hdp->disksize);
324 hdp->disksize = (off_t)0;
328 hdp->disksize += fs.avail;
331 printf("driver: adding holding disk %d dir %s size "
332 OFF_T_FMT " chunksize " OFF_T_FMT "\n",
333 dsk, holdingdisk_get_diskdir(hdp),
334 (OFF_T_FMT_TYPE)hdp->disksize,
335 (OFF_T_FMT_TYPE)(holdingdisk_get_chunksize(hdp)));
337 newdir = newvstralloc(newdir,
338 holdingdisk_get_diskdir(hdp), "/", hd_driver_timestamp,
340 if(!mkholdingdir(newdir)) {
341 hdp->disksize = (off_t)0;
343 total_disksize += hdp->disksize;
346 reserved_space = total_disksize * (off_t)(reserve / 100);
348 printf("reserving " OFF_T_FMT " out of " OFF_T_FMT
349 " for degraded-mode dumps\n",
350 (OFF_T_FMT_TYPE)reserved_space, (OFF_T_FMT_TYPE)free_space());
354 if(inparallel > MAX_DUMPERS) inparallel = MAX_DUMPERS;
356 /* taper takes a while to get going, so start it up right away */
359 if(conf_runtapes > 0) {
360 startup_tape_process(taper_program);
361 taper_cmd(START_TAPER, driver_timestamp, NULL, 0, NULL);
364 /* fire up the dumpers now while we are waiting */
365 if(!nodump) startup_dump_processes(dumper_program, inparallel, driver_timestamp);
368 * Read schedule from stdin. Usually, this is a pipe from planner,
369 * so the effect is that we wait here for the planner to
370 * finish, but meanwhile the taper is rewinding the tape, reading
371 * the label, checking it, writing a new label and all that jazz
372 * in parallel with the planner.
378 tapeq = read_flush();
380 roomq.head = roomq.tail = NULL;
382 log_add(L_STATS, "startup time %s", walltime_str(curclock()));
384 printf("driver: start time %s inparallel %d bandwidth %lu diskspace "
385 OFF_T_FMT " ", walltime_str(curclock()), inparallel,
386 free_kps((interface_t *)0), (OFF_T_FMT_TYPE)free_space());
387 printf(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n",
388 "OBSOLETE", driver_timestamp, taperalgo2str(conf_taperalgo),
389 getconf_str(CNF_DUMPORDER));
392 /* ok, planner is done, now lets see if the tape is ready */
394 if(conf_runtapes > 0) {
395 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
397 if(cmd != TAPER_OK) {
398 /* no tape, go into degraded mode: dump to holding disk */
406 tape_left = tape_length;
409 taper_ev_read = NULL;
410 if(!need_degraded) startaflush();
413 schedule_ev_read = event_register((event_id_t)0, EV_READFD, read_schedule, NULL);
418 /* handle any remaining dumps by dumping directly to tape, if possible */
420 while(!empty(runq) && taper > 0) {
421 diskp = dequeue_disk(&runq);
422 if (diskp->to_holdingdisk == HOLD_REQUIRED) {
423 log_add(L_FAIL, "%s %s %s %d [%s]",
424 diskp->host->hostname, diskp->name, sched(diskp)->datestamp,
426 "can't dump required holdingdisk");
428 else if (!degraded_mode) {
429 int rc = dump_to_tape(diskp);
432 "%s %s %d [dump to tape failed, will try again]",
433 diskp->host->hostname,
435 sched(diskp)->level);
437 log_add(L_FAIL, "%s %s %s %d [dump to tape failed]",
438 diskp->host->hostname,
440 sched(diskp)->datestamp,
441 sched(diskp)->level);
444 log_add(L_FAIL, "%s %s %s %d [%s]",
445 diskp->host->hostname, diskp->name, sched(diskp)->datestamp,
447 diskp->to_holdingdisk == HOLD_AUTO ?
448 "no more holding disk space" :
449 "can't dump no-hold disk in degraded mode");
452 short_dump_state(); /* for amstatus */
454 printf("driver: QUITTING time %s telling children to quit\n",
455 walltime_str(curclock()));
459 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
461 dumper_cmd(dumper, QUIT, NULL);
466 taper_cmd(QUIT, NULL, NULL, 0, NULL);
469 /* wait for all to die */
472 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
473 cleanup_holdingdisk(holdingdisk_get_diskdir(hdp), 0);
478 check_unfree_serial();
479 printf("driver: FINISHED time %s\n", walltime_str(curclock()));
481 log_add(L_FINISH,"date %s time %s", driver_timestamp, walltime_str(curclock()));
482 amfree(driver_timestamp);
484 free_new_argv(new_argc, new_argv);
485 amfree(dumper_program);
486 amfree(taper_program);
490 malloc_size_2 = malloc_inuse(&malloc_hist_2);
492 if(malloc_size_1 != malloc_size_2) {
493 malloc_list(fileno(stderr), malloc_hist_1, malloc_hist_2);
501 /* sleep up to count seconds, and wait for terminating child process */
502 /* if sleep is negative, this function will not timeout */
503 /* exit once all child process are finished or the timout expired */
504 /* return 0 if no more children to wait */
505 /* return 1 if some children are still alive */
507 wait_children(int count)
519 pid = waitpid((pid_t)-1, &retstat, WNOHANG);
523 if (! WIFEXITED(retstat)) {
525 code = WTERMSIG(retstat);
526 } else if (WEXITSTATUS(retstat) != 0) {
528 code = WEXITSTATUS(retstat);
531 for (dumper = dmptable; dumper < dmptable + inparallel;
533 if (pid == dumper->pid) {
534 who = stralloc(dumper->name);
538 if (pid == dumper->chunker->pid) {
539 who = stralloc(dumper->chunker->name);
540 dumper->chunker->pid = -1;
544 if (who == NULL && pid == taper_pid) {
545 who = stralloc("taper");
548 if(what != NULL && who == NULL) {
549 who = stralloc("unknown");
552 log_add(L_WARNING, "%s pid %u exited with %s %d\n", who,
553 (unsigned)pid, what, code);
554 printf("driver: %s pid %u exited with %s %d\n", who,
555 (unsigned)pid, what, code);
559 } while (pid > 0 || wait_errno == EINTR);
564 } while ((errno != ECHILD) && (count != 0));
565 return (errno != ECHILD);
569 kill_children(int signal)
574 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
575 if (!dumper->down && dumper->pid > 1) {
576 printf("driver: sending signal %d to %s pid %u\n", signal,
577 dumper->name, (unsigned)dumper->pid);
578 if (kill(dumper->pid, signal) == -1 && errno == ESRCH) {
580 dumper->chunker->pid = 0;
582 if (dumper->chunker && dumper->chunker->pid > 1) {
583 printf("driver: sending signal %d to %s pid %u\n", signal,
584 dumper->chunker->name,
585 (unsigned)dumper->chunker->pid);
586 if (kill(dumper->chunker->pid, signal) == -1 &&
588 dumper->chunker->pid = 0;
595 printf("driver: sending signal %d to %s pid %u\n", signal,
596 "taper", (unsigned)taper_pid);
597 if (kill(taper_pid, signal) == -1 && errno == ESRCH)
602 wait_for_children(void)
607 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
608 if (dumper->pid > 1 && dumper->fd >= 0) {
609 dumper_cmd(dumper, QUIT, NULL);
610 if (dumper->chunker && dumper->chunker->pid > 1 &&
611 dumper->chunker->fd >= 0)
612 chunker_cmd(dumper->chunker, QUIT, NULL);
617 if(taper_pid > 1 && taper > 0) {
618 taper_cmd(QUIT, NULL, NULL, 0, NULL);
621 if(wait_children(60) == 0)
624 kill_children(SIGHUP);
625 if(wait_children(60) == 0)
628 kill_children(SIGKILL);
629 if(wait_children(-1) == 0)
643 if(!degraded_mode && !taper_busy && !empty(tapeq)) {
645 datestamp = sched(tapeq.head)->datestamp;
646 switch(conf_taperalgo) {
648 dp = dequeue_disk(&tapeq);
652 while (fit != NULL) {
653 extra_tapes = (fit->tape_splitsize > (off_t)0) ?
654 conf_runtapes - current_tape : 0;
655 if(sched(fit)->act_size <= (tape_left +
656 tape_length * (off_t)extra_tapes) &&
657 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
665 if(dp) remove_disk(&tapeq, dp);
668 fit = dp = tapeq.head;
669 while (fit != NULL) {
670 if(sched(fit)->act_size > sched(dp)->act_size &&
671 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
676 if(dp) remove_disk(&tapeq, dp);
678 case ALGO_LARGESTFIT:
680 while (fit != NULL) {
681 extra_tapes = (fit->tape_splitsize > (off_t)0) ?
682 conf_runtapes - current_tape : 0;
683 if(sched(fit)->act_size <=
684 (tape_left + tape_length * (off_t)extra_tapes) &&
685 (!dp || sched(fit)->act_size > sched(dp)->act_size) &&
686 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
691 if(dp) remove_disk(&tapeq, dp);
697 remove_disk(&tapeq, dp);
700 if(!dp) { /* ALGO_SMALLEST, or default if nothing fit. */
701 if(conf_taperalgo != ALGO_SMALLEST) {
703 "driver: startaflush: Using SMALLEST because nothing fit\n");
705 fit = dp = tapeq.head;
706 while (fit != NULL) {
707 if(sched(fit)->act_size < sched(dp)->act_size &&
708 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
713 if(dp) remove_disk(&tapeq, dp);
715 if(taper_ev_read == NULL) {
716 taper_ev_read = event_register((event_id_t)taper, EV_READFD,
717 handle_taper_result, NULL);
722 qname = quote_string(dp->name);
723 taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
724 sched(dp)->datestamp);
725 fprintf(stderr,"driver: startaflush: %s %s %s "
726 OFF_T_FMT " " OFF_T_FMT "\n",
727 taperalgo2str(conf_taperalgo), dp->host->hostname, qname,
728 (OFF_T_FMT_TYPE)sched(taper_disk)->act_size,
729 (OFF_T_FMT_TYPE)tape_left);
730 if(sched(dp)->act_size <= tape_left)
731 tape_left -= sched(dp)->act_size;
733 tape_left = (off_t)0;
736 error("FATAL: Taper marked busy and no work found.");
739 } else if(!taper_busy && taper_ev_read != NULL) {
740 event_release(taper_ev_read);
741 taper_ev_read = NULL;
752 /* first, check if host is too busy */
754 if(dp->host->inprogress >= dp->host->maxdumps) {
758 /* next, check conflict with other dumps on same spindle */
760 if(dp->spindle == -1) { /* but spindle -1 never conflicts by def. */
764 for(dp2 = dp->host->disks; dp2 != NULL; dp2 = dp2->hostnext)
765 if(dp2->inprogress && dp2->spindle == dp->spindle) {
777 disk_t *diskp, *delayed_diskp, *diskp_accept;
778 assignedhd_t **holdp=NULL, **holdp_accept;
779 const time_t now = time(NULL);
782 char *result_argv[MAX_ARGS+1];
788 idle_reason = IDLE_NO_DUMPERS;
791 if(dumpers_ev_time != NULL) {
792 event_release(dumpers_ev_time);
793 dumpers_ev_time = NULL;
796 for (dumper = dmptable; dumper < dmptable+inparallel; dumper++) {
802 if (dumper->ev_read != NULL) {
803 event_release(dumper->ev_read);
804 dumper->ev_read = NULL;
808 * A potential problem with starting from the bottom of the dump time
809 * distribution is that a slave host will have both one of the shortest
810 * and one of the longest disks, so starting its shortest disk first will
811 * tie up the host and eliminate its longest disk from consideration the
812 * first pass through. This could cause a big delay in starting that long
813 * disk, which could drag out the whole night's dumps.
815 * While starting from the top of the dump time distribution solves the
816 * above problem, this turns out to be a bad idea, because the big dumps
817 * will almost certainly pack the holding disk completely, leaving no
818 * room for even one small dump to start. This ends up shutting out the
819 * small-end dumpers completely (they stay idle).
821 * The introduction of multiple simultaneous dumps to one host alleviates
822 * the biggest&smallest dumps problem: both can be started at the
828 delayed_diskp = NULL;
832 dumporder = getconf_str(CNF_DUMPORDER);
833 if(strlen(dumporder) > (size_t)(dumper-dmptable)) {
834 dumptype = dumporder[dumper-dmptable];
837 if(dumper-dmptable < 3)
843 for(diskp = rq->head; diskp != NULL; diskp = diskp->next) {
844 assert(diskp->host != NULL && sched(diskp) != NULL);
846 if (diskp->host->start_t > now) {
847 cur_idle = max(cur_idle, IDLE_START_WAIT);
848 if (delayed_diskp == NULL || sleep_time > diskp->host->start_t) {
849 delayed_diskp = diskp;
850 sleep_time = diskp->host->start_t;
852 } else if(diskp->start_t > now) {
853 cur_idle = max(cur_idle, IDLE_START_WAIT);
854 if (delayed_diskp == NULL || sleep_time > diskp->start_t) {
855 delayed_diskp = diskp;
856 sleep_time = diskp->start_t;
858 } else if (diskp->host->netif->curusage > 0 &&
859 sched(diskp)->est_kps > free_kps(diskp->host->netif)) {
860 cur_idle = max(cur_idle, IDLE_NO_BANDWIDTH);
861 } else if(sched(diskp)->no_space) {
862 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
863 } else if (diskp->to_holdingdisk == HOLD_NEVER) {
864 cur_idle = max(cur_idle, IDLE_NO_HOLD);
866 find_diskspace(sched(diskp)->est_size, &cur_idle, NULL)) == NULL) {
867 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
868 } else if (client_constrained(diskp)) {
869 free_assignedhd(holdp);
870 cur_idle = max(cur_idle, IDLE_CLIENT_CONSTRAINED);
873 /* disk fits, dump it */
874 int accept = !diskp_accept;
877 case 's': accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
879 case 'S': accept = (sched(diskp)->est_size > sched(diskp_accept)->est_size);
881 case 't': accept = (sched(diskp)->est_time < sched(diskp_accept)->est_time);
883 case 'T': accept = (sched(diskp)->est_time > sched(diskp_accept)->est_time);
885 case 'b': accept = (sched(diskp)->est_kps < sched(diskp_accept)->est_kps);
887 case 'B': accept = (sched(diskp)->est_kps > sched(diskp_accept)->est_kps);
889 default: log_add(L_WARNING, "Unknown dumporder character \'%c\', using 's'.\n",
891 accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
896 if( !diskp_accept || !degraded_mode || diskp->priority >= diskp_accept->priority) {
897 if(holdp_accept) free_assignedhd(holdp_accept);
898 diskp_accept = diskp;
899 holdp_accept = holdp;
902 free_assignedhd(holdp);
906 free_assignedhd(holdp);
911 diskp = diskp_accept;
912 holdp = holdp_accept;
914 idle_reason = max(idle_reason, cur_idle);
917 * If we have no disk at this point, and there are disks that
918 * are delayed, then schedule a time event to call this dumper
919 * with the disk with the shortest delay.
921 if (diskp == NULL && delayed_diskp != NULL) {
922 assert(sleep_time > now);
924 dumpers_ev_time = event_register((event_id_t)sleep_time, EV_TIME,
925 handle_dumpers_time, &runq);
927 } else if (diskp != NULL) {
928 sched(diskp)->act_size = (off_t)0;
929 allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
930 sched(diskp)->activehd = assign_holdingdisk(holdp, diskp);
932 sched(diskp)->destname = newstralloc(sched(diskp)->destname,
933 sched(diskp)->holdp[0]->destname);
934 diskp->host->inprogress++; /* host is now busy */
935 diskp->inprogress = 1;
936 sched(diskp)->dumper = dumper;
937 sched(diskp)->timestamp = now;
939 dumper->busy = 1; /* dumper is now busy */
940 dumper->dp = diskp; /* link disk to dumper */
941 remove_disk(rq, diskp); /* take it off the run queue */
943 sched(diskp)->origsize = (off_t)-1;
944 sched(diskp)->dumpsize = (off_t)-1;
945 sched(diskp)->dumptime = (time_t)0;
946 sched(diskp)->tapetime = (time_t)0;
947 chunker = dumper->chunker;
948 chunker->result = LAST_TOK;
949 dumper->result = LAST_TOK;
950 startup_chunk_process(chunker,chunker_program);
951 chunker_cmd(chunker, START, (void *)driver_timestamp);
952 chunker->dumper = dumper;
953 chunker_cmd(chunker, PORT_WRITE, diskp);
954 cmd = getresult(chunker->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
956 assignedhd_t **h=NULL;
959 printf("driver: did not get PORT from %s for %s:%s\n",
960 chunker->name, diskp->host->hostname, diskp->name);
963 deallocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
964 h = sched(diskp)->holdp;
965 activehd = sched(diskp)->activehd;
966 h[activehd]->used = 0;
967 holdalloc(h[activehd]->disk)->allocated_dumpers--;
968 adjust_diskspace(diskp, DONE);
969 delete_diskspace(diskp);
970 diskp->host->inprogress--;
971 diskp->inprogress = 0;
972 sched(diskp)->dumper = NULL;
975 sched(diskp)->attempted++;
976 free_serial_dp(diskp);
977 if(sched(diskp)->attempted < 2)
978 enqueue_disk(rq, diskp);
981 dumper->ev_read = event_register((event_id_t)dumper->fd, EV_READFD,
982 handle_dumper_result, dumper);
983 chunker->ev_read = event_register((event_id_t)chunker->fd, EV_READFD,
984 handle_chunker_result, chunker);
985 dumper->output_port = atoi(result_argv[2]);
987 dumper_cmd(dumper, PORT_DUMP, diskp);
989 diskp->host->start_t = now + 15;
995 * This gets called when a dumper is delayed for some reason. It may
996 * be because a disk has a delayed start, or amanda is constrained
997 * by network or disk limits.
1001 handle_dumpers_time(
1004 disklist_t *runq = cookie;
1005 event_release(dumpers_ev_time);
1006 dumpers_ev_time = NULL;
1007 start_some_dumps(runq);
1018 printf("dump of driver schedule %s:\n--------\n", str);
1020 for(dp = qp->head; dp != NULL; dp = dp->next) {
1021 qname = quote_string(dp->name);
1022 printf(" %-20s %-25s lv %d t %5lu s " OFF_T_FMT " p %d\n",
1023 dp->host->hostname, qname, sched(dp)->level,
1024 sched(dp)->est_time,
1025 (OFF_T_FMT_TYPE)sched(dp)->est_size, sched(dp)->priority);
1028 printf("--------\n");
1032 start_degraded_mode(
1033 /*@keep@*/ disklist_t *queuep)
1037 off_t est_full_size;
1040 if (taper_ev_read != NULL) {
1041 event_release(taper_ev_read);
1042 taper_ev_read = NULL;
1045 newq.head = newq.tail = 0;
1047 dump_schedule(queuep, "before start degraded mode");
1049 est_full_size = (off_t)0;
1050 while(!empty(*queuep)) {
1051 dp = dequeue_disk(queuep);
1053 qname = quote_string(dp->name);
1054 if(sched(dp)->level != 0)
1055 /* go ahead and do the disk as-is */
1056 enqueue_disk(&newq, dp);
1058 if (reserved_space + est_full_size + sched(dp)->est_size
1059 <= total_disksize) {
1060 enqueue_disk(&newq, dp);
1061 est_full_size += sched(dp)->est_size;
1063 else if(sched(dp)->degr_level != -1) {
1064 sched(dp)->level = sched(dp)->degr_level;
1065 sched(dp)->dumpdate = sched(dp)->degr_dumpdate;
1066 sched(dp)->est_nsize = sched(dp)->degr_nsize;
1067 sched(dp)->est_csize = sched(dp)->degr_csize;
1068 sched(dp)->est_time = sched(dp)->degr_time;
1069 sched(dp)->est_kps = sched(dp)->degr_kps;
1070 enqueue_disk(&newq, dp);
1073 log_add(L_FAIL,"%s %s %s %d [can't switch to incremental dump]",
1074 dp->host->hostname, qname, sched(dp)->datestamp,
1081 /*@i@*/ *queuep = newq;
1084 dump_schedule(queuep, "after start degraded mode");
1089 continue_port_dumps(void)
1093 int active_dumpers=0, busy_dumpers=0, i;
1096 /* First we try to grant diskspace to some dumps waiting for it. */
1097 for( dp = roomq.head; dp; dp = ndp ) {
1099 /* find last holdingdisk used by this dump */
1100 for( i = 0, h = sched(dp)->holdp; h[i+1]; i++ ) {
1101 (void)h; /* Quiet lint */
1103 /* find more space */
1104 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1105 &active_dumpers, h[i] );
1107 for(dumper = dmptable; dumper < dmptable + inparallel &&
1108 dumper->dp != dp; dumper++) {
1109 (void)dp; /* Quiet lint */
1111 assert( dumper < dmptable + inparallel );
1112 sched(dp)->activehd = assign_holdingdisk( h, dp );
1113 chunker_cmd( dumper->chunker, CONTINUE, dp );
1115 remove_disk( &roomq, dp );
1119 /* So for some disks there is less holding diskspace available than
1120 * was asked for. Possible reasons are
1121 * a) diskspace has been allocated for other dumps which are
1122 * still running or already being written to tape
1123 * b) all other dumps have been suspended due to lack of diskspace
1124 * c) this dump doesn't fit on all the holding disks
1125 * Case a) is not a problem. We just wait for the diskspace to
1126 * be freed by moving the current disk to a queue.
1127 * If case b) occurs, we have a deadlock situation. We select
1128 * a dump from the queue to be aborted and abort it. It will
1129 * be retried later dumping to disk.
1130 * If case c) is detected, the dump is aborted. Next time
1131 * it will be dumped directly to tape. Actually, case c is a special
1132 * manifestation of case b) where only one dumper is busy.
1134 for(dp=NULL, dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
1135 if( dumper->busy ) {
1137 if( !find_disk(&roomq, dumper->dp) ) {
1140 sched(dp)->est_size > sched(dumper->dp)->est_size ) {
1145 if((dp != NULL) && (active_dumpers == 0) && (busy_dumpers > 0) &&
1146 ((!taper_busy && empty(tapeq)) || degraded_mode) &&
1147 pending_aborts == 0 ) { /* not case a */
1148 if( busy_dumpers == 1 ) { /* case c */
1149 sched(dp)->no_space = 1;
1152 /* At this time, dp points to the dump with the smallest est_size.
1153 * We abort that dump, hopefully not wasting too much time retrying it.
1155 remove_disk( &roomq, dp );
1156 chunker_cmd( sched(dp)->dumper->chunker, ABORT, NULL);
1157 dumper_cmd( sched(dp)->dumper, ABORT, NULL );
1164 handle_taper_result(
1171 char *result_argv[MAX_ARGS+1];
1172 int avail_tapes = 0;
1174 (void)cookie; /* Quiet unused parameter warning */
1176 assert(cookie == NULL);
1182 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
1187 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
1188 if(result_argc != 5) {
1189 error("error: [taper DONE result_argc != 5: %d", result_argc);
1193 dp = serial2disk(result_argv[2]);
1194 free_serial(result_argv[2]);
1196 filenum = OFF_T_ATOI(result_argv[4]);
1198 update_info_taper(dp, result_argv[3], filenum,
1202 delete_diskspace(dp);
1204 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1205 walltime_str(curclock()), dp->host->hostname, dp->name);
1208 amfree(sched(dp)->destname);
1209 amfree(sched(dp)->dumpdate);
1210 amfree(sched(dp)->degr_dumpdate);
1211 amfree(sched(dp)->datestamp);
1218 /* continue with those dumps waiting for diskspace */
1219 continue_port_dumps();
1222 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
1223 if (result_argc < 2) {
1224 error("error [taper TRYAGAIN result_argc < 2: %d]",
1228 dp = serial2disk(result_argv[2]);
1229 free_serial(result_argv[2]);
1230 printf("driver: taper-tryagain time %s disk %s:%s\n",
1231 walltime_str(curclock()), dp->host->hostname, dp->name);
1234 /* See how many tapes we have left, but we alwyays
1235 retry once (why?) */
1237 if(dp->tape_splitsize > (off_t)0)
1238 avail_tapes = conf_runtapes - current_tape;
1242 if(sched(dp)->attempted > avail_tapes) {
1243 log_add(L_FAIL, "%s %s %s %d [too many taper retries]",
1244 dp->host->hostname, dp->name, sched(dp)->datestamp,
1246 printf("driver: taper failed %s %s %s, too many taper retry\n",
1247 result_argv[2], dp->host->hostname, dp->name);
1250 /* Re-insert into taper queue. */
1251 sched(dp)->attempted++;
1252 headqueue_disk(&tapeq, dp);
1255 tape_left = tape_length;
1257 /* run next thing from queue */
1262 continue_port_dumps();
1265 case SPLIT_CONTINUE: /* SPLIT_CONTINUE <handle> <new_label> */
1266 if (result_argc != 3) {
1267 error("error [taper SPLIT_CONTINUE result_argc != 3: %d]",
1273 case SPLIT_NEEDNEXT: /* SPLIT-NEEDNEXT <handle> <kb written> */
1274 if (result_argc != 3) {
1275 error("error [taper SPLIT_NEEDNEXT result_argc != 3: %d]",
1280 /* Update our tape counter and reset tape_left */
1282 tape_left = tape_length;
1284 /* Reduce the size of the dump by amount written and reduce
1285 tape_left by the amount left over */
1286 dp = serial2disk(result_argv[2]);
1287 sched(dp)->act_size -= OFF_T_ATOI(result_argv[3]);
1288 if (sched(dp)->act_size < tape_left)
1289 tape_left -= sched(dp)->act_size;
1295 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
1296 dp = serial2disk(result_argv[2]);
1297 free_serial(result_argv[2]);
1298 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1299 walltime_str(curclock()), dp->host->hostname, dp->name);
1301 log_add(L_WARNING, "Taper error: %s", result_argv[3]);
1306 log_add(L_WARNING, "Taper protocol error");
1309 * Since we received a taper error, we can't send anything more
1310 * to the taper. Go into degraded mode to try to get everthing
1311 * onto disk. Later, these dumps can be flushed to a new tape.
1312 * The tape queue is zapped so that it appears empty in future
1313 * checks. If there are dumps waiting for diskspace to be freed,
1318 "going into degraded mode because of taper component error.");
1319 start_degraded_mode(&runq);
1321 tapeq.head = tapeq.tail = NULL;
1324 if(taper_ev_read != NULL) {
1325 event_release(taper_ev_read);
1326 taper_ev_read = NULL;
1328 if(cmd != TAPE_ERROR) aclose(taper);
1329 continue_port_dumps();
1333 error("driver received unexpected token (%s) from taper",
1338 * Wakeup any dumpers that are sleeping because of network
1339 * or disk constraints.
1341 start_some_dumps(&runq);
1343 } while(areads_dataready(taper));
1351 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1352 if(!dumper->busy && !dumper->down) return dumper;
1363 assignedhd_t **h=NULL;
1369 dumper = sched(dp)->dumper;
1370 chunker = dumper->chunker;
1374 h = sched(dp)->holdp;
1375 activehd = sched(dp)->activehd;
1377 if(dumper->result == DONE && chunker->result == DONE) {
1378 update_info_dumper(dp, sched(dp)->origsize,
1379 sched(dp)->dumpsize, sched(dp)->dumptime);
1380 log_add(L_STATS, "estimate %s %s %s %d [sec %ld nkb " OFF_T_FMT
1381 " ckb " OFF_T_FMT " kps %d]",
1382 dp->host->hostname, dp->name, sched(dp)->datestamp,
1384 sched(dp)->est_time, (OFF_T_FMT_TYPE)sched(dp)->est_nsize,
1385 (OFF_T_FMT_TYPE)sched(dp)->est_csize,
1386 sched(dp)->est_kps);
1389 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1391 is_partial = dumper->result != DONE || chunker->result != DONE;
1392 rename_tmp_holding(sched(dp)->destname, !is_partial);
1395 for( i = 0, h = sched(dp)->holdp; i < activehd; i++ ) {
1396 dummy += h[i]->used;
1399 size = size_holding_files(sched(dp)->destname, 0);
1400 h[activehd]->used = size - dummy;
1401 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1402 adjust_diskspace(dp, DONE);
1404 sched(dp)->attempted += 1;
1406 if((dumper->result != DONE || chunker->result != DONE) &&
1407 sched(dp)->attempted <= 1) {
1408 delete_diskspace(dp);
1409 enqueue_disk(&runq, dp);
1411 else if(size > (off_t)DISK_BLOCK_KB) {
1412 sched(dp)->attempted = 0;
1413 enqueue_disk(&tapeq, dp);
1417 delete_diskspace(dp);
1421 dp->host->inprogress -= 1;
1424 waitpid(chunker->pid, NULL, 0 );
1425 aclose(chunker->fd);
1430 if (chunker->result == ABORT_FINISHED)
1432 continue_port_dumps();
1434 * Wakeup any dumpers that are sleeping because of network
1435 * or disk constraints.
1437 start_some_dumps(&runq);
1442 handle_dumper_result(
1445 /*static int pending_aborts = 0;*/
1446 dumper_t *dumper = cookie;
1451 char *result_argv[MAX_ARGS+1];
1453 assert(dumper != NULL);
1455 assert(dp != NULL && sched(dp) != NULL);
1461 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1464 /* result_argv[2] always contains the serial number */
1465 sdp = serial2disk(result_argv[2]);
1467 error("%s: Invalid serial number", get_pname(), result_argv[2]);
1472 qname = quote_string(dp->name);
1475 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
1476 if(result_argc != 6) {
1477 error("error [dumper DONE result_argc != 6: %d]", result_argc);
1481 /*free_serial(result_argv[2]);*/
1483 sched(dp)->origsize = OFF_T_ATOI(result_argv[3]);
1484 sched(dp)->dumptime = TIME_T_ATOI(result_argv[5]);
1486 printf("driver: finished-cmd time %s %s dumped %s:%s\n",
1487 walltime_str(curclock()), dumper->name,
1488 dp->host->hostname, qname);
1491 dumper->result = cmd;
1495 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1497 * Requeue this disk, and fall through to the FAILED
1500 if(sched(dp)->attempted) {
1501 log_add(L_FAIL, "%s %s %s %d [too many dumper retry: %s]",
1502 dp->host->hostname, dp->name, sched(dp)->datestamp,
1503 sched(dp)->level, result_argv[3]);
1504 printf("driver: dump failed %s %s %s, too many dumper retry: %s\n",
1505 result_argv[2], dp->host->hostname, dp->name,
1509 case FAILED: /* FAILED <handle> <errstr> */
1510 /*free_serial(result_argv[2]);*/
1511 dumper->result = cmd;
1514 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1516 * We sent an ABORT from the NO-ROOM case because this dump
1517 * wasn't going to fit onto the holding disk. We now need to
1518 * clean up the remains of this image, and try to finish
1519 * other dumps that are waiting on disk space.
1521 assert(pending_aborts);
1522 /*free_serial(result_argv[2]);*/
1523 dumper->result = cmd;
1527 /* either EOF or garbage from dumper. Turn it off */
1528 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1529 dumper->name, (long)dumper->pid);
1530 if (dumper->ev_read) {
1531 event_release(dumper->ev_read);
1532 dumper->ev_read = NULL;
1536 dumper->down = 1; /* mark it down so it isn't used again */
1538 /* if it was dumping something, zap it and try again */
1539 if(sched(dp)->attempted) {
1540 log_add(L_FAIL, "%s %s %s %d [%s died]",
1541 dp->host->hostname, qname, sched(dp)->datestamp,
1542 sched(dp)->level, dumper->name);
1545 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1546 dumper->name, dp->host->hostname, qname,
1550 dumper->result = cmd;
1558 /* send the dumper result to the chunker */
1559 if(dumper->chunker->down == 0 && dumper->chunker->fd != -1 &&
1560 dumper->chunker->result == LAST_TOK) {
1562 chunker_cmd(dumper->chunker, DONE, dp);
1565 chunker_cmd(dumper->chunker, FAILED, dp);
1569 if(dumper->result != LAST_TOK && dumper->chunker->result != LAST_TOK)
1572 } while(areads_dataready(dumper->fd));
1577 handle_chunker_result(
1580 /*static int pending_aborts = 0;*/
1581 chunker_t *chunker = cookie;
1582 assignedhd_t **h=NULL;
1587 char *result_argv[MAX_ARGS+1];
1592 assert(chunker != NULL);
1593 dumper = chunker->dumper;
1594 assert(dumper != NULL);
1597 assert(sched(dp) != NULL);
1598 assert(sched(dp)->destname != NULL);
1599 assert(dp != NULL && sched(dp) != NULL && sched(dp)->destname);
1601 if(dp && sched(dp) && sched(dp)->holdp) {
1602 h = sched(dp)->holdp;
1603 activehd = sched(dp)->activehd;
1610 cmd = getresult(chunker->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1613 /* result_argv[2] always contains the serial number */
1614 sdp = serial2disk(result_argv[2]);
1616 error("%s: Invalid serial number", get_pname(), result_argv[2]);
1623 case PARTIAL: /* PARTIAL <handle> <dumpsize> <errstr> */
1624 case DONE: /* DONE <handle> <dumpsize> <errstr> */
1625 if(result_argc != 4) {
1626 error("error [chunker %s result_argc != 4: %d]", cmdstr[cmd],
1630 /*free_serial(result_argv[2]);*/
1632 sched(dp)->dumpsize = (off_t)atof(result_argv[3]);
1634 qname = quote_string(dp->name);
1635 printf("driver: finished-cmd time %s %s chunked %s:%s\n",
1636 walltime_str(curclock()), chunker->name,
1637 dp->host->hostname, qname);
1641 event_release(chunker->ev_read);
1643 chunker->result = cmd;
1647 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1648 event_release(chunker->ev_read);
1650 chunker->result = cmd;
1653 case FAILED: /* FAILED <handle> <errstr> */
1654 /*free_serial(result_argv[2]);*/
1656 event_release(chunker->ev_read);
1658 chunker->result = cmd;
1662 case NO_ROOM: /* NO-ROOM <handle> <missing_size> */
1663 if (!h || activehd < 0) { /* should never happen */
1664 error("!h || activehd < 0");
1667 h[activehd]->used -= OFF_T_ATOI(result_argv[3]);
1668 h[activehd]->reserved -= OFF_T_ATOI(result_argv[3]);
1669 holdalloc(h[activehd]->disk)->allocated_space -= OFF_T_ATOI(result_argv[3]);
1670 h[activehd]->disk->disksize -= OFF_T_ATOI(result_argv[3]);
1673 case RQ_MORE_DISK: /* RQ-MORE-DISK <handle> */
1674 if (!h || activehd < 0) { /* should never happen */
1675 error("!h || activehd < 0");
1678 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1679 h[activehd]->used = h[activehd]->reserved;
1680 if( h[++activehd] ) { /* There's still some allocated space left.
1681 * Tell the dumper about it. */
1682 sched(dp)->activehd++;
1683 chunker_cmd( chunker, CONTINUE, dp );
1684 } else { /* !h[++activehd] - must allocate more space */
1685 sched(dp)->act_size = sched(dp)->est_size; /* not quite true */
1686 sched(dp)->est_size = (sched(dp)->act_size/(off_t)20) * (off_t)21; /* +5% */
1687 sched(dp)->est_size = am_round(sched(dp)->est_size, (off_t)DISK_BLOCK_KB);
1688 if (sched(dp)->est_size < sched(dp)->act_size + 2*DISK_BLOCK_KB)
1689 sched(dp)->est_size += 2 * DISK_BLOCK_KB;
1690 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1694 /* No diskspace available. The reason for this will be
1695 * determined in continue_port_dumps(). */
1696 enqueue_disk( &roomq, dp );
1697 continue_port_dumps();
1699 /* OK, allocate space for disk and have chunker continue */
1700 sched(dp)->activehd = assign_holdingdisk( h, dp );
1701 chunker_cmd( chunker, CONTINUE, dp );
1707 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1709 * We sent an ABORT from the NO-ROOM case because this dump
1710 * wasn't going to fit onto the holding disk. We now need to
1711 * clean up the remains of this image, and try to finish
1712 * other dumps that are waiting on disk space.
1714 /*assert(pending_aborts);*/
1716 /*free_serial(result_argv[2]);*/
1718 event_release(chunker->ev_read);
1720 chunker->result = cmd;
1725 /* either EOF or garbage from chunker. Turn it off */
1726 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1727 chunker->name, (long)chunker->pid);
1730 /* if it was dumping something, zap it and try again */
1731 if (!h || activehd < 0) { /* should never happen */
1732 error("!h || activehd < 0");
1735 qname = quote_string(dp->name);
1736 if(sched(dp)->attempted) {
1737 log_add(L_FAIL, "%s %s %s %d [%s died]",
1738 dp->host->hostname, qname, sched(dp)->datestamp,
1739 sched(dp)->level, chunker->name);
1742 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1743 chunker->name, dp->host->hostname, qname,
1750 event_release(chunker->ev_read);
1752 chunker->result = cmd;
1760 if(chunker->result != LAST_TOK && chunker->dumper->result != LAST_TOK)
1763 } while(areads_dataready(chunker->fd));
1774 char *hostname, *diskname, *datestamp;
1778 char *inpline = NULL;
1785 tq.head = tq.tail = NULL;
1787 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1789 if (inpline[0] == '\0')
1795 skip_whitespace(s, ch); /* find the command */
1797 error("flush line %d: syntax error (no command)", line);
1801 skip_non_whitespace(s, ch);
1804 if(strcmp(command,"ENDFLUSH") == 0) {
1808 if(strcmp(command,"FLUSH") != 0) {
1809 error("flush line %d: syntax error (%s != FLUSH)", line, command);
1813 skip_whitespace(s, ch); /* find the hostname */
1815 error("flush line %d: syntax error (no hostname)", line);
1819 skip_non_whitespace(s, ch);
1822 skip_whitespace(s, ch); /* find the diskname */
1824 error("flush line %d: syntax error (no diskname)", line);
1828 skip_quoted_string(s, ch);
1829 s[-1] = '\0'; /* terminate the disk name */
1830 diskname = unquote_string(qname);
1832 skip_whitespace(s, ch); /* find the datestamp */
1834 error("flush line %d: syntax error (no datestamp)", line);
1838 skip_non_whitespace(s, ch);
1841 skip_whitespace(s, ch); /* find the level number */
1842 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1843 error("flush line %d: syntax error (bad level)", line);
1846 skip_integer(s, ch);
1848 skip_whitespace(s, ch); /* find the filename */
1850 error("flush line %d: syntax error (no filename)", line);
1854 skip_non_whitespace(s, ch);
1857 get_dumpfile(destname, &file);
1858 if( file.type != F_DUMPFILE) {
1859 if( file.type != F_CONT_DUMPFILE )
1860 log_add(L_INFO, "%s: ignoring cruft file.", destname);
1865 if(strcmp(hostname, file.name) != 0 ||
1866 strcmp(diskname, file.disk) != 0 ||
1867 strcmp(datestamp, file.datestamp) != 0) {
1868 log_add(L_INFO, "disk %s:%s not consistent with file %s",
1869 hostname, diskname, destname);
1875 dp = lookup_disk(file.name, file.disk);
1878 log_add(L_INFO, "%s: disk %s:%s not in database, skipping it.",
1879 destname, file.name, file.disk);
1883 if(file.dumplevel < 0 || file.dumplevel > 9) {
1884 log_add(L_INFO, "%s: ignoring file with bogus dump level %d.",
1885 destname, file.dumplevel);
1889 dp1 = (disk_t *)alloc(SIZEOF(disk_t));
1891 dp1->next = dp1->prev = NULL;
1893 /* add it to the flushhost list */
1895 flushhost = alloc(SIZEOF(am_host_t));
1896 flushhost->next = NULL;
1897 flushhost->hostname = stralloc("FLUSHHOST");
1898 flushhost->up = NULL;
1899 flushhost->features = NULL;
1901 dp1->hostnext = flushhost->disks;
1902 flushhost->disks = dp1;
1904 sp = (sched_t *) alloc(SIZEOF(sched_t));
1905 sp->destname = stralloc(destname);
1906 sp->level = file.dumplevel;
1907 sp->dumpdate = NULL;
1908 sp->degr_dumpdate = NULL;
1909 sp->datestamp = stralloc(file.datestamp);
1910 sp->est_nsize = (off_t)0;
1911 sp->est_csize = (off_t)0;
1915 sp->degr_level = -1;
1917 sp->act_size = size_holding_files(destname, 0);
1918 sp->holdp = build_diskspace(destname);
1919 if(sp->holdp == NULL) continue;
1921 sp->timestamp = (time_t)0;
1923 dp1->up = (char *)sp;
1925 enqueue_disk(&tq, dp1);
1938 int level, line, priority;
1939 char *dumpdate, *degr_dumpdate;
1941 time_t time, degr_time;
1942 time_t *time_p = &time;
1943 time_t *degr_time_p = °r_time;
1944 off_t nsize, csize, degr_nsize, degr_csize;
1945 unsigned long kps, degr_kps;
1946 char *hostname, *features, *diskname, *datestamp, *inpline = NULL;
1950 off_t flush_size = (off_t)0;
1953 (void)cookie; /* Quiet unused parameter warning */
1955 event_release(schedule_ev_read);
1957 /* read schedule from stdin */
1959 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1960 if (inpline[0] == '\0')
1967 skip_whitespace(s, ch); /* find the command */
1969 error("schedule line %d: syntax error (no command)", line);
1973 skip_non_whitespace(s, ch);
1976 if(strcmp(command,"DUMP") != 0) {
1977 error("schedule line %d: syntax error (%s != DUMP)", line, command);
1981 skip_whitespace(s, ch); /* find the host name */
1983 error("schedule line %d: syntax error (no host name)", line);
1987 skip_non_whitespace(s, ch);
1990 skip_whitespace(s, ch); /* find the feature list */
1992 error("schedule line %d: syntax error (no feature list)", line);
1996 skip_non_whitespace(s, ch);
1999 skip_whitespace(s, ch); /* find the disk name */
2001 error("schedule line %d: syntax error (no disk name)", line);
2005 skip_quoted_string(s, ch);
2006 s[-1] = '\0'; /* terminate the disk name */
2007 diskname = unquote_string(qname);
2009 skip_whitespace(s, ch); /* find the datestamp */
2011 error("schedule line %d: syntax error (no datestamp)", line);
2015 skip_non_whitespace(s, ch);
2018 skip_whitespace(s, ch); /* find the priority number */
2019 if(ch == '\0' || sscanf(s - 1, "%d", &priority) != 1) {
2020 error("schedule line %d: syntax error (bad priority)", line);
2023 skip_integer(s, ch);
2025 skip_whitespace(s, ch); /* find the level number */
2026 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
2027 error("schedule line %d: syntax error (bad level)", line);
2030 skip_integer(s, ch);
2032 skip_whitespace(s, ch); /* find the dump date */
2034 error("schedule line %d: syntax error (bad dump date)", line);
2038 skip_non_whitespace(s, ch);
2041 skip_whitespace(s, ch); /* find the native size */
2042 if(ch == '\0' || sscanf(s - 1, OFF_T_FMT,
2043 (OFF_T_FMT_TYPE *)&nsize) != 1) {
2044 error("schedule line %d: syntax error (bad nsize)", line);
2047 skip_integer(s, ch);
2049 skip_whitespace(s, ch); /* find the compressed size */
2050 if(ch == '\0' || sscanf(s - 1, OFF_T_FMT,
2051 (OFF_T_FMT_TYPE *)&csize) != 1) {
2052 error("schedule line %d: syntax error (bad csize)", line);
2055 skip_integer(s, ch);
2057 skip_whitespace(s, ch); /* find the time number */
2058 if(ch == '\0' || sscanf(s - 1, TIME_T_FMT,
2059 (TIME_T_FMT_TYPE *)time_p) != 1) {
2060 error("schedule line %d: syntax error (bad estimated time)", line);
2063 skip_integer(s, ch);
2065 skip_whitespace(s, ch); /* find the kps number */
2066 if(ch == '\0' || sscanf(s - 1, "%lu", &kps) != 1) {
2067 error("schedule line %d: syntax error (bad kps)", line);
2070 skip_integer(s, ch);
2072 degr_dumpdate = NULL; /* flag if degr fields found */
2073 skip_whitespace(s, ch); /* find the degr level number */
2075 if(sscanf(s - 1, "%d", °r_level) != 1) {
2076 error("schedule line %d: syntax error (bad degr level)", line);
2079 skip_integer(s, ch);
2081 skip_whitespace(s, ch); /* find the degr dump date */
2083 error("schedule line %d: syntax error (bad degr dump date)", line);
2086 degr_dumpdate = s - 1;
2087 skip_non_whitespace(s, ch);
2090 skip_whitespace(s, ch); /* find the degr native size */
2091 if(ch == '\0' || sscanf(s - 1, OFF_T_FMT,
2092 (OFF_T_FMT_TYPE *)°r_nsize) != 1) {
2093 error("schedule line %d: syntax error (bad degr nsize)", line);
2096 skip_integer(s, ch);
2098 skip_whitespace(s, ch); /* find the degr compressed size */
2099 if(ch == '\0' || sscanf(s - 1, OFF_T_FMT,
2100 (OFF_T_FMT_TYPE *)°r_csize) != 1) {
2101 error("schedule line %d: syntax error (bad degr csize)", line);
2104 skip_integer(s, ch);
2106 skip_whitespace(s, ch); /* find the degr time number */
2107 if(ch == '\0' || sscanf(s - 1, TIME_T_FMT,
2108 (TIME_T_FMT_TYPE *)degr_time_p) != 1) {
2109 error("schedule line %d: syntax error (bad degr estimated time)", line);
2112 skip_integer(s, ch);
2114 skip_whitespace(s, ch); /* find the degr kps number */
2115 if(ch == '\0' || sscanf(s - 1, "%lu", °r_kps) != 1) {
2116 error("schedule line %d: syntax error (bad degr kps)", line);
2119 skip_integer(s, ch);
2122 dp = lookup_disk(hostname, diskname);
2125 "schedule line %d: %s:'%s' not in disklist, ignored",
2126 line, hostname, qname);
2131 sp = (sched_t *) alloc(SIZEOF(sched_t));
2134 sp->dumpdate = stralloc(dumpdate);
2135 sp->est_nsize = DISK_BLOCK_KB + nsize; /* include header */
2136 sp->est_csize = DISK_BLOCK_KB + csize; /* include header */
2137 /* round estimate to next multiple of DISK_BLOCK_KB */
2138 sp->est_csize = am_round(sp->est_csize, DISK_BLOCK_KB);
2139 sp->est_size = sp->est_csize;
2140 sp->est_time = time;
2142 sp->priority = priority;
2143 sp->datestamp = stralloc(datestamp);
2146 sp->degr_level = degr_level;
2147 sp->degr_dumpdate = stralloc(degr_dumpdate);
2148 sp->degr_nsize = DISK_BLOCK_KB + degr_nsize;
2149 sp->degr_csize = DISK_BLOCK_KB + degr_csize;
2150 /* round estimate to next multiple of DISK_BLOCK_KB */
2151 sp->degr_csize = am_round(sp->degr_csize, DISK_BLOCK_KB);
2152 sp->degr_time = degr_time;
2153 sp->degr_kps = degr_kps;
2155 sp->degr_level = -1;
2156 sp->degr_dumpdate = NULL;
2161 sp->act_size = (off_t)0;
2165 sp->timestamp = (time_t)0;
2166 sp->destname = NULL;
2169 dp->up = (char *) sp;
2170 if(dp->host->features == NULL) {
2171 dp->host->features = am_string_to_feature(features);
2173 remove_disk(&waitq, dp);
2174 enqueue_disk(&runq, dp);
2175 flush_size += sp->act_size;
2178 printf("driver: flush size " OFF_T_FMT "\n", (OFF_T_FMT_TYPE)flush_size);
2181 log_add(L_WARNING, "WARNING: got empty schedule from planner");
2182 if(need_degraded==1) start_degraded_mode(&runq);
2183 start_some_dumps(&runq);
2186 static unsigned long
2192 if (ip == (interface_t *)0) {
2194 unsigned long maxusage=0;
2195 unsigned long curusage=0;
2196 for(p = lookup_interface(NULL); p != NULL; p = p->next) {
2197 maxusage += interface_get_maxusage(p);
2198 curusage += p->curusage;
2200 res = maxusage - curusage;
2203 res = interface_get_maxusage(ip) - ip->curusage;
2216 printf("driver: interface-state time %s", time_str);
2218 for(ip = lookup_interface(NULL); ip != NULL; ip = ip->next) {
2219 printf(" if %s: free %lu", ip->name, free_kps(ip));
2229 ip->curusage += kps;
2233 deallocate_bandwidth(
2237 assert(kps <= ip->curusage);
2238 ip->curusage -= kps;
2249 total_free = (off_t)0;
2250 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2251 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2259 * We return an array of pointers to assignedhd_t. The array contains at
2260 * most one entry per holding disk. The list of pointers is terminated by
2261 * a NULL pointer. Each entry contains a pointer to a holdingdisk and
2262 * how much diskspace to use on that disk. Later on, assign_holdingdisk
2263 * will allocate the given amount of space.
2264 * If there is not enough room on the holdingdisks, NULL is returned.
2267 static assignedhd_t **
2271 assignedhd_t * pref)
2273 assignedhd_t **result = NULL;
2274 holdingdisk_t *minp, *hdp;
2275 int i=0, num_holdingdisks=0; /* are we allowed to use the global thing? */
2278 off_t halloc, dalloc, hfree, dfree;
2280 (void)cur_idle; /* Quiet unused parameter warning */
2282 if (size < 2*DISK_BLOCK_KB)
2283 size = 2*DISK_BLOCK_KB;
2284 size = am_round(size, (off_t)DISK_BLOCK_KB);
2287 printf("%s: want " OFF_T_FMT " K\n", debug_prefix_time(": find_diskspace"),
2288 (OFF_T_FMT_TYPE)size);
2292 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2296 used = alloc(SIZEOF(*used) * num_holdingdisks);/*disks used during this run*/
2297 memset( used, 0, (size_t)num_holdingdisks );
2298 result = alloc(SIZEOF(assignedhd_t *) * (num_holdingdisks + 1));
2301 while( i < num_holdingdisks && size > (off_t)0 ) {
2302 /* find the holdingdisk with the fewest active dumpers and among
2303 * those the one with the biggest free space
2305 minp = NULL; minj = -1;
2306 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next, j++ ) {
2307 if( pref && pref->disk == hdp && !used[j] &&
2308 holdalloc(hdp)->allocated_space <= hdp->disksize - (off_t)DISK_BLOCK_KB) {
2313 else if( holdalloc(hdp)->allocated_space <= hdp->disksize - (off_t)(2*DISK_BLOCK_KB) &&
2316 holdalloc(hdp)->allocated_dumpers < holdalloc(minp)->allocated_dumpers ||
2317 (holdalloc(hdp)->allocated_dumpers == holdalloc(minp)->allocated_dumpers &&
2318 hdp->disksize-holdalloc(hdp)->allocated_space > minp->disksize-holdalloc(minp)->allocated_space)) ) {
2325 if( !minp ) { break; } /* all holding disks are full */
2328 /* hfree = free space on the disk */
2329 hfree = minp->disksize - holdalloc(minp)->allocated_space;
2331 /* dfree = free space for data, remove 1 header for each chunksize */
2332 dfree = hfree - (((hfree-(off_t)1)/holdingdisk_get_chunksize(minp))+(off_t)1) * (off_t)DISK_BLOCK_KB;
2334 /* dalloc = space I can allocate for data */
2335 dalloc = ( dfree < size ) ? dfree : size;
2337 /* halloc = space to allocate, including 1 header for each chunksize */
2338 halloc = dalloc + (((dalloc-(off_t)1)/holdingdisk_get_chunksize(minp))+(off_t)1) * (off_t)DISK_BLOCK_KB;
2341 printf("%s: find diskspace: size " OFF_T_FMT " hf " OFF_T_FMT
2342 " df " OFF_T_FMT " da " OFF_T_FMT " ha " OFF_T_FMT "\n",
2343 debug_prefix_time(": find_diskspace"),
2344 (OFF_T_FMT_TYPE)size,
2345 (OFF_T_FMT_TYPE)hfree,
2346 (OFF_T_FMT_TYPE)dfree,
2347 (OFF_T_FMT_TYPE)dalloc,
2348 (OFF_T_FMT_TYPE)halloc);
2352 result[i] = alloc(SIZEOF(assignedhd_t));
2353 result[i]->disk = minp;
2354 result[i]->reserved = halloc;
2355 result[i]->used = (off_t)0;
2356 result[i]->destname = NULL;
2359 } /* while i < num_holdingdisks && size > 0 */
2362 if(size != (off_t)0) { /* not enough space available */
2363 printf("find diskspace: not enough diskspace. Left with "
2364 OFF_T_FMT " K\n", (OFF_T_FMT_TYPE)size);
2366 free_assignedhd(result);
2371 for( i = 0; result && result[i]; i++ ) {
2372 printf("%s: find diskspace: selected %s free " OFF_T_FMT " reserved " OFF_T_FMT " dumpers %d\n",
2373 debug_prefix_time(": find_diskspace"),
2374 holdingdisk_get_diskdir(result[i]->disk),
2375 (OFF_T_FMT_TYPE)(result[i]->disk->disksize -
2376 holdalloc(result[i]->disk)->allocated_space),
2377 (OFF_T_FMT_TYPE)result[i]->reserved,
2378 holdalloc(result[i]->disk)->allocated_dumpers);
2388 assignedhd_t ** holdp,
2393 char *sfn = sanitise_filename(diskp->name);
2395 assignedhd_t **new_holdp;
2398 snprintf( lvl, SIZEOF(lvl), "%d", sched(diskp)->level );
2400 size = am_round(sched(diskp)->est_size - sched(diskp)->act_size,
2401 (off_t)DISK_BLOCK_KB);
2403 for( c = 0; holdp[c]; c++ )
2404 (void)c; /* count number of disks */
2406 /* allocate memory for sched(diskp)->holdp */
2407 for(j = 0; sched(diskp)->holdp && sched(diskp)->holdp[j]; j++)
2408 (void)j; /* Quiet lint */
2409 new_holdp = (assignedhd_t **)alloc(SIZEOF(assignedhd_t*)*(j+c+1));
2410 if (sched(diskp)->holdp) {
2411 memcpy(new_holdp, sched(diskp)->holdp, j * SIZEOF(*new_holdp));
2412 amfree(sched(diskp)->holdp);
2414 sched(diskp)->holdp = new_holdp;
2418 if( j > 0 ) { /* This is a request for additional diskspace. See if we can
2419 * merge assignedhd_t's */
2421 if( sched(diskp)->holdp[j-1]->disk == holdp[0]->disk ) { /* Yes! */
2422 sched(diskp)->holdp[j-1]->reserved += holdp[0]->reserved;
2423 holdalloc(holdp[0]->disk)->allocated_space += holdp[0]->reserved;
2424 size = (holdp[0]->reserved>size) ? (off_t)0 : size-holdp[0]->reserved;
2425 qname = quote_string(diskp->name);
2427 printf("%s: merging holding disk %s to disk %s:%s, add " OFF_T_FMT " for reserved " OFF_T_FMT ", left " OFF_T_FMT "\n",
2428 debug_prefix_time(": assign_holdingdisk"),
2429 holdingdisk_get_diskdir(sched(diskp)->holdp[j-1]->disk),
2430 diskp->host->hostname, qname,
2431 (OFF_T_FMT_TYPE)holdp[0]->reserved,
2432 (OFF_T_FMT_TYPE)sched(diskp)->holdp[j-1]->reserved,
2433 (OFF_T_FMT_TYPE)size);
2443 /* copy assignedhd_s to sched(diskp), adjust allocated_space */
2444 for( ; holdp[i]; i++ ) {
2445 holdp[i]->destname = newvstralloc( holdp[i]->destname,
2446 holdingdisk_get_diskdir(holdp[i]->disk), "/",
2447 hd_driver_timestamp, "/",
2448 diskp->host->hostname, ".",
2451 sched(diskp)->holdp[j++] = holdp[i];
2452 holdalloc(holdp[i]->disk)->allocated_space += holdp[i]->reserved;
2453 size = (holdp[i]->reserved > size) ? (off_t)0 :
2454 (size - holdp[i]->reserved);
2455 qname = quote_string(diskp->name);
2457 printf("%s: %d assigning holding disk %s to disk %s:%s, reserved " OFF_T_FMT ", left " OFF_T_FMT "\n",
2458 debug_prefix_time(": assign_holdingdisk"),
2459 i, holdingdisk_get_diskdir(holdp[i]->disk), diskp->host->hostname, qname,
2460 (OFF_T_FMT_TYPE)holdp[i]->reserved,
2461 (OFF_T_FMT_TYPE)size);
2465 holdp[i] = NULL; /* so it doesn't get free()d... */
2467 sched(diskp)->holdp[j] = NULL;
2478 assignedhd_t **holdp;
2479 off_t total = (off_t)0;
2482 char *qname, *hqname, *qdest;
2484 (void)cmd; /* Quiet unused parameter warning */
2486 qname = quote_string(diskp->name);
2487 qdest = quote_string(sched(diskp)->destname);
2489 printf("%s: %s:%s %s\n",
2490 debug_prefix_time(": adjust_diskspace"),
2491 diskp->host->hostname, qname, qdest);
2495 holdp = sched(diskp)->holdp;
2497 assert(holdp != NULL);
2499 for( i = 0; holdp[i]; i++ ) { /* for each allocated disk */
2500 diff = holdp[i]->used - holdp[i]->reserved;
2501 total += holdp[i]->used;
2502 holdalloc(holdp[i]->disk)->allocated_space += diff;
2503 hqname = quote_string(holdp[i]->disk->name);
2505 printf("%s: hdisk %s done, reserved " OFF_T_FMT " used " OFF_T_FMT " diff " OFF_T_FMT " alloc " OFF_T_FMT " dumpers %d\n",
2506 debug_prefix_time(": adjust_diskspace"),
2507 holdp[i]->disk->name,
2508 (OFF_T_FMT_TYPE)holdp[i]->reserved,
2509 (OFF_T_FMT_TYPE)holdp[i]->used,
2510 (OFF_T_FMT_TYPE)diff,
2511 (OFF_T_FMT_TYPE)holdalloc(holdp[i]->disk)->allocated_space,
2512 holdalloc(holdp[i]->disk)->allocated_dumpers );
2515 holdp[i]->reserved += diff;
2519 sched(diskp)->act_size = total;
2522 printf("%s: after: disk %s:%s used " OFF_T_FMT "\n",
2523 debug_prefix_time(": adjust_diskspace"),
2524 diskp->host->hostname, qname,
2525 (OFF_T_FMT_TYPE)sched(diskp)->act_size);
2536 assignedhd_t **holdp;
2539 holdp = sched(diskp)->holdp;
2541 assert(holdp != NULL);
2543 for( i = 0; holdp[i]; i++ ) { /* for each disk */
2544 /* find all files of this dump on that disk, and subtract their
2545 * reserved sizes from the disk's allocated space
2547 holdalloc(holdp[i]->disk)->allocated_space -= holdp[i]->used;
2550 unlink_holding_files(holdp[0]->destname); /* no need for the entire list,
2551 * because unlink_holding_files
2552 * will walk through all files
2553 * using cont_filename */
2554 free_assignedhd(sched(diskp)->holdp);
2555 sched(diskp)->holdp = NULL;
2556 sched(diskp)->act_size = (off_t)0;
2559 static assignedhd_t **
2566 char buffer[DISK_BLOCK_BYTES];
2568 assignedhd_t **result;
2571 int num_holdingdisks=0;
2572 char dirname[1000], *ch;
2574 char *filename = destname;
2576 memset(buffer, 0, sizeof(buffer));
2577 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2580 used = alloc(SIZEOF(off_t) * num_holdingdisks);
2581 for(i=0;i<num_holdingdisks;i++)
2583 result = alloc(SIZEOF(assignedhd_t *) * (num_holdingdisks + 1));
2585 while(filename != NULL && filename[0] != '\0') {
2586 strncpy(dirname, filename, 999);
2588 ch = strrchr(dirname,'/');
2590 ch = strrchr(dirname,'/');
2593 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL;
2594 hdp = hdp->next, j++ ) {
2595 if(strcmp(dirname, holdingdisk_get_diskdir(hdp))==0) {
2600 if(stat(filename, &finfo) == -1) {
2601 fprintf(stderr, "stat %s: %s\n", filename, strerror(errno));
2602 finfo.st_size = (off_t)0;
2604 used[j] += ((off_t)finfo.st_size+(off_t)1023)/(off_t)1024;
2605 if((fd = open(filename,O_RDONLY)) == -1) {
2606 fprintf(stderr,"build_diskspace: open of %s failed: %s\n",
2607 filename, strerror(errno));
2610 if ((buflen = fullread(fd, buffer, SIZEOF(buffer))) > 0) {;
2611 parse_file_header(buffer, &file, (size_t)buflen);
2614 filename = file.cont_filename;
2617 for(j = 0, i=0, hdp = getconf_holdingdisks(); hdp != NULL;
2618 hdp = hdp->next, j++ ) {
2619 if(used[j] != (off_t)0) {
2620 result[i] = alloc(SIZEOF(assignedhd_t));
2621 result[i]->disk = hdp;
2622 result[i]->reserved = used[j];
2623 result[i]->used = used[j];
2624 result[i]->destname = stralloc(destname);
2642 printf("driver: hdisk-state time %s", time_str);
2644 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
2645 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2646 printf(" hdisk %d: free " OFF_T_FMT " dumpers %d", dsk,
2647 (OFF_T_FMT_TYPE)diff, holdalloc(hdp)->allocated_dumpers);
2653 update_failed_dump_to_tape(
2657 * should simply set no_bump
2660 time_t save_timestamp = sched(dp)->timestamp;
2661 /* setting timestamp to 0 removes the current level from the
2662 * database, so that we ensure that it will not be bumped to the
2663 * next level on the next run. If we didn't do this, dumpdates or
2664 * gnutar-lists might have been updated already, and a bumped
2665 * incremental might be created. */
2666 sched(dp)->timestamp = 0;
2667 update_info_dumper(dp, (off_t)-1, (off_t)-1, (time_t)-1);
2668 sched(dp)->timestamp = save_timestamp;
2671 /* ------------------- */
2679 off_t origsize = (off_t)0;
2680 off_t dumpsize = (off_t)0;
2681 time_t dumptime = (time_t)0;
2682 double tapetime = 0.0;
2684 int result_argc, rc;
2685 char *result_argv[MAX_ARGS+1];
2686 int dumper_tryagain = 0;
2689 qname = quote_string(dp->name);
2690 printf("driver: dumping %s:%s directly to tape\n",
2691 dp->host->hostname, qname);
2694 /* pick a dumper and fail if there are no idle dumpers */
2696 dumper = idle_dumper();
2698 printf("driver: no idle dumpers for %s:%s.\n",
2699 dp->host->hostname, qname);
2701 log_add(L_WARNING, "no idle dumpers for %s:%s.\n",
2702 dp->host->hostname, qname);
2704 return 2; /* fatal problem */
2707 /* tell the taper to read from a port number of its choice */
2709 taper_cmd(PORT_WRITE, dp, NULL, sched(dp)->level, sched(dp)->datestamp);
2710 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2712 printf("driver: did not get PORT from taper for %s:%s\n",
2713 dp->host->hostname, qname);
2716 return 2; /* fatal problem */
2718 /* copy port number */
2719 dumper->output_port = atoi(result_argv[2]);
2721 /* tell the dumper to dump to a port */
2723 dumper_cmd(dumper, PORT_DUMP, dp);
2724 dp->host->start_t = time(NULL) + 15;
2726 /* update statistics & print state */
2728 taper_busy = dumper->busy = 1;
2729 dp->host->inprogress += 1;
2731 sched(dp)->timestamp = time((time_t *)0);
2732 allocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2733 idle_reason = NOT_IDLE;
2737 /* wait for result from dumper */
2739 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
2743 /* either eof or garbage from dumper */
2744 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
2745 dumper->name, (long)dumper->pid);
2746 dumper->down = 1; /* mark it down so it isn't used again */
2747 failed = 1; /* dump failed, must still finish up with taper */
2750 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
2751 /* everything went fine */
2752 origsize = (off_t)atof(result_argv[3]);
2753 /*dumpsize = (off_t)atof(result_argv[4]);*/
2754 dumptime = (time_t)atof(result_argv[5]);
2757 case NO_ROOM: /* NO-ROOM <handle> */
2758 dumper_cmd(dumper, ABORT, dp);
2759 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
2760 assert(cmd == ABORT_FINISHED);
2762 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
2764 /* dump failed, but we must still finish up with taper */
2765 /* problem with dump, possibly nonfatal, retry one time */
2766 sched(dp)->attempted++;
2767 failed = sched(dp)->attempted;
2768 dumper_tryagain = 1;
2771 case FAILED: /* FAILED <handle> <errstr> */
2772 /* dump failed, but we must still finish up with taper */
2773 failed = 2; /* fatal problem with dump */
2778 * Note that at this point, even if the dump above failed, it may
2779 * not be a fatal failure if taper below says we can try again.
2780 * E.g. a dumper failure above may actually be the result of a
2781 * tape overflow, which in turn causes dump to see "broken pipe",
2782 * "no space on device", etc., since taper closed the port first.
2787 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2791 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
2792 if(result_argc != 5) {
2793 error("error [dump to tape DONE result_argc != 5: %d]", result_argc);
2797 if(failed == 1) goto tryagain; /* dump didn't work */
2798 else if(failed == 2) goto failed_dumper;
2800 free_serial(result_argv[2]);
2802 if (*result_argv[5] == '"') {
2803 /* String was quoted */
2804 rc = sscanf(result_argv[5],"\"[sec %lf kb " OFF_T_FMT " ",
2805 &tapetime, (OFF_T_FMT_TYPE *)&dumpsize);
2807 /* String was not quoted */
2808 rc = sscanf(result_argv[5],"[sec %lf kb " OFF_T_FMT " ",
2809 &tapetime, (OFF_T_FMT_TYPE *)&dumpsize);
2812 error("error [malformed result: %d items matched in '%s']",
2813 rc, result_argv[5]);
2818 /* every thing went fine */
2819 update_info_dumper(dp, origsize, dumpsize, dumptime);
2820 filenum = OFF_T_ATOI(result_argv[4]);
2821 update_info_taper(dp, result_argv[3], filenum, sched(dp)->level);
2822 /* note that update_info_dumper() must be run before
2823 update_info_taper(), since update_info_dumper overwrites
2824 tape information. */
2829 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
2830 tape_left = tape_length;
2832 if(dumper_tryagain == 0) {
2833 sched(dp)->attempted++;
2834 if(sched(dp)->attempted > failed)
2835 failed = sched(dp)->attempted;
2839 headqueue_disk(&runq, dp);
2841 update_failed_dump_to_tape(dp);
2842 free_serial(result_argv[2]);
2845 case SPLIT_CONTINUE: /* SPLIT_CONTINUE <handle> <new_label> */
2846 if (result_argc != 3) {
2847 error("error [taper SPLIT_CONTINUE result_argc != 3: %d]", result_argc);
2850 fprintf(stderr, "driver: Got SPLIT_CONTINUE %s %s\n",
2851 result_argv[2], result_argv[3]);
2852 goto continue_port_dump;
2854 case SPLIT_NEEDNEXT:
2855 fprintf(stderr, "driver: Got SPLIT_NEEDNEXT %s %s\n", result_argv[2], result_argv[3]);
2857 goto continue_port_dump;
2859 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
2862 update_failed_dump_to_tape(dp);
2863 free_serial(result_argv[2]);
2864 failed = 2; /* fatal problem */
2865 start_degraded_mode(&runq);
2869 /* reset statistics & return */
2871 taper_busy = dumper->busy = 0;
2872 dp->host->inprogress -= 1;
2874 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2887 for(len = 0, p = q.head; p != NULL; len++, p = p->next)
2888 (void)len; /* Quiet lint */
2893 short_dump_state(void)
2898 wall_time = walltime_str(curclock());
2900 printf("driver: state time %s ", wall_time);
2901 printf("free kps: %lu space: " OFF_T_FMT " taper: ",
2902 free_kps((interface_t *)0),
2903 (OFF_T_FMT_TYPE)free_space());
2904 if(degraded_mode) printf("DOWN");
2905 else if(!taper_busy) printf("idle");
2906 else printf("writing");
2908 for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
2909 printf(" idle-dumpers: %d", nidle);
2910 printf(" qlen tapeq: %d", queue_length(tapeq));
2911 printf(" runq: %d", queue_length(runq));
2912 printf(" roomq: %d", queue_length(roomq));
2913 printf(" wakeup: %d", (int)sleep_time);
2914 printf(" driver-idle: %s\n", idle_strings[idle_reason]);
2915 interface_state(wall_time);
2916 holdingdisk_state(wall_time);
2929 printf("================\n");
2930 printf("driver state at time %s: %s\n", walltime_str(curclock()), str);
2931 printf("free kps: %lu, space: " OFF_T_FMT "\n",
2932 free_kps((interface_t *)0),
2933 (OFF_T_FMT_TYPE)free_space());
2934 if(degraded_mode) printf("taper: DOWN\n");
2935 else if(!taper_busy) printf("taper: idle\n");
2936 else printf("taper: writing %s:%s.%d est size " OFF_T_FMT "\n",
2937 taper_disk->host->hostname, taper_disk->name,
2938 sched(taper_disk)->level,
2939 sched(taper_disk)->est_size);
2940 for(i = 0; i < inparallel; i++) {
2941 dp = dmptable[i].dp;
2942 if(!dmptable[i].busy)
2943 printf("%s: idle\n", dmptable[i].name);
2945 qname = quote_string(dp->name);
2946 printf("%s: dumping %s:%s.%d est kps %d size " OFF_T_FMT " time %lu\n",
2947 dmptable[i].name, dp->host->hostname, qname, sched(dp)->level,
2948 sched(dp)->est_kps, sched(dp)->est_size, sched(dp)->est_time);
2951 dump_queue("TAPE", tapeq, 5, stdout);
2952 dump_queue("ROOM", roomq, 5, stdout);
2953 dump_queue("RUN ", runq, 5, stdout);
2954 printf("================\n");