2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1998 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: driver.c,v 1.198.2.6 2006/12/27 14:44:48 martinea Exp $
29 * controlling process for the Amanda backup system
33 * XXX possibly modify tape queue to be cognizant of how much room is left on
34 * tape. Probably not effective though, should do this in planner.
50 #include "server_util.h"
52 static disklist_t waitq, runq, tapeq, roomq;
53 static int pending_aborts;
54 static disk_t *taper_disk;
55 static int degraded_mode;
56 static off_t reserved_space;
57 static off_t total_disksize;
58 static char *dumper_program;
59 static char *chunker_program;
60 static int inparallel;
61 static int nodump = 0;
62 static off_t tape_length = (off_t)0;
63 static off_t tape_left = (off_t)0;
64 static int current_tape = 1;
65 static int conf_taperalgo;
66 static int conf_runtapes;
67 static time_t sleep_time;
68 static int idle_reason;
69 static char *driver_timestamp;
70 static char *hd_driver_timestamp;
71 static am_host_t *flushhost = NULL;
72 static int need_degraded=0;
74 static event_handle_t *dumpers_ev_time = NULL;
75 static event_handle_t *schedule_ev_read = NULL;
77 static int wait_children(int count);
78 static void wait_for_children(void);
79 static void allocate_bandwidth(interface_t *ip, unsigned long kps);
80 static int assign_holdingdisk(assignedhd_t **holdp, disk_t *diskp);
81 static void adjust_diskspace(disk_t *diskp, cmd_t cmd);
82 static void delete_diskspace(disk_t *diskp);
83 static assignedhd_t **build_diskspace(char *destname);
84 static int client_constrained(disk_t *dp);
85 static void deallocate_bandwidth(interface_t *ip, unsigned long kps);
86 static void dump_schedule(disklist_t *qp, char *str);
87 static int dump_to_tape(disk_t *dp);
88 static assignedhd_t **find_diskspace(off_t size, int *cur_idle,
89 assignedhd_t *preferred);
90 static unsigned long free_kps(interface_t *ip);
91 static off_t free_space(void);
92 static void dumper_result(disk_t *dp);
93 static void handle_dumper_result(void *);
94 static void handle_chunker_result(void *);
95 static void handle_dumpers_time(void *);
96 static void handle_taper_result(void *);
97 static void holdingdisk_state(char *time_str);
98 static dumper_t *idle_dumper(void);
99 static void interface_state(char *time_str);
100 static int queue_length(disklist_t q);
101 static disklist_t read_flush(void);
102 static void read_schedule(void *cookie);
103 static void short_dump_state(void);
104 static void startaflush(void);
105 static void start_degraded_mode(disklist_t *queuep);
106 static void start_some_dumps(disklist_t *rq);
107 static void continue_port_dumps(void);
108 static void update_failed_dump_to_tape(disk_t *);
110 static void dump_state(const char *str);
112 int main(int main_argc, char **main_argv);
114 static const char *idle_strings[] = {
117 #define IDLE_NO_DUMPERS 1
119 #define IDLE_START_WAIT 2
121 #define IDLE_NO_HOLD 3
123 #define IDLE_CLIENT_CONSTRAINED 4
124 "client-constrained",
125 #define IDLE_NO_DISKSPACE 5
127 #define IDLE_TOO_LARGE 6
129 #define IDLE_NO_BANDWIDTH 7
131 #define IDLE_TAPER_WAIT 8
145 generic_fs_stats_t fs;
147 unsigned long malloc_hist_1, malloc_size_1;
148 unsigned long malloc_hist_2, malloc_size_2;
149 unsigned long reserve = 100;
154 char *result_argv[MAX_ARGS+1];
159 int new_argc, my_argc;
160 char **new_argv, **my_argv;
164 setvbuf(stdout, (char *)NULL, (int)_IOLBF, 0);
165 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
169 dbopen(DBG_SUBDIR_SERVER);
171 atexit(wait_for_children);
173 /* Don't die when child closes pipe */
174 signal(SIGPIPE, SIG_IGN);
176 malloc_size_1 = malloc_inuse(&malloc_hist_1);
178 erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
179 set_logerror(logerror);
183 parse_server_conf(main_argc, main_argv, &new_argc, &new_argv);
187 printf("%s: pid %ld executable %s version %s\n",
188 get_pname(), (long) getpid(), my_argv[0], version());
191 config_name = stralloc(my_argv[1]);
192 config_dir = vstralloc(CONFIG_DIR, "/", config_name, "/", NULL);
194 if(strncmp(my_argv[2], "nodump", 6) == 0) {
201 char my_cwd[STR_SIZE];
203 if (getcwd(my_cwd, SIZEOF(my_cwd)) == NULL) {
204 error("cannot determine current working directory");
207 config_dir = stralloc2(my_cwd, "/");
208 if ((config_name = strrchr(my_cwd, '/')) != NULL) {
209 config_name = stralloc(config_name + 1);
215 conffile = stralloc2(config_dir, CONFFILE_NAME);
216 if(read_conffile(conffile)) {
217 error("errors processing config file \"%s\"", conffile);
222 dbrename(config_name, DBG_SUBDIR_SERVER);
224 report_bad_conf_arg();
226 amfree(driver_timestamp);
227 /* read timestamp from stdin */
228 while ((line = agets(stdin)) != NULL) {
233 if ( line == NULL ) {
234 error("Did not get DATE line from planner");
237 driver_timestamp = alloc(15);
238 strncpy(driver_timestamp, &line[5], 14);
239 driver_timestamp[14] = '\0';
241 log_add(L_START,"date %s", driver_timestamp);
243 /* check that we don't do many dump in a day and usetimestamps is off */
244 if(strlen(driver_timestamp) == 8) {
246 char *conf_logdir = getconf_str(CNF_LOGDIR);
247 char *logfile = vstralloc(conf_logdir, "/log.",
248 driver_timestamp, ".0", NULL);
249 char *oldlogfile = vstralloc(conf_logdir, "/oldlog/log.",
250 driver_timestamp, ".0", NULL);
251 if(access(logfile, F_OK) == 0 || access(oldlogfile, F_OK) == 0) {
252 log_add(L_WARNING, "WARNING: This is not the first amdump run today. Enable the usetimestamps option in the configuration file if you want to run amdump more than once per calendar day.");
257 hd_driver_timestamp = construct_timestamp(NULL);
260 hd_driver_timestamp = stralloc(driver_timestamp);
263 taper_program = vstralloc(libexecdir, "/", "taper", versionsuffix(), NULL);
264 dumper_program = vstralloc(libexecdir, "/", "dumper", versionsuffix(),
266 chunker_program = vstralloc(libexecdir, "/", "chunker", versionsuffix(),
269 conf_taperalgo = getconf_taperalgo(CNF_TAPERALGO);
270 conf_tapetype = getconf_str(CNF_TAPETYPE);
271 conf_runtapes = getconf_int(CNF_RUNTAPES);
272 tape = lookup_tapetype(conf_tapetype);
273 tape_length = tapetype_get_length(tape);
274 printf("driver: tape size " OFF_T_FMT "\n", (OFF_T_FMT_TYPE)tape_length);
276 /* start initializing: read in databases */
278 conf_diskfile = getconf_str(CNF_DISKFILE);
279 if (*conf_diskfile == '/') {
280 conf_diskfile = stralloc(conf_diskfile);
282 conf_diskfile = stralloc2(config_dir, conf_diskfile);
284 if (read_diskfile(conf_diskfile, &origq) < 0) {
285 error("could not load disklist \"%s\"", conf_diskfile);
288 amfree(conf_diskfile);
290 /* set up any configuration-dependent variables */
292 inparallel = getconf_int(CNF_INPARALLEL);
294 reserve = (unsigned long)getconf_int(CNF_RESERVE);
296 total_disksize = (off_t)0;
297 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
298 hdp->up = (void *)alloc(SIZEOF(holdalloc_t));
299 holdalloc(hdp)->allocated_dumpers = 0;
300 holdalloc(hdp)->allocated_space = (off_t)0;
302 if(get_fs_stats(holdingdisk_get_diskdir(hdp), &fs) == -1
303 || access(holdingdisk_get_diskdir(hdp), W_OK) == -1) {
304 log_add(L_WARNING, "WARNING: ignoring holding disk %s: %s\n",
305 holdingdisk_get_diskdir(hdp), strerror(errno));
310 if(fs.avail != (off_t)-1) {
311 if(hdp->disksize > (off_t)0) {
312 if(hdp->disksize > fs.avail) {
314 "WARNING: %s: " OFF_T_FMT " KB requested, "
315 "but only " OFF_T_FMT " KB available.",
316 holdingdisk_get_diskdir(hdp),
317 (OFF_T_FMT_TYPE)hdp->disksize,
318 (OFF_T_FMT_TYPE)fs.avail);
319 hdp->disksize = fs.avail;
322 else if((fs.avail + hdp->disksize) < (off_t)0) {
324 "WARNING: %s: not " OFF_T_FMT " KB free.",
325 holdingdisk_get_diskdir(hdp), -hdp->disksize);
326 hdp->disksize = (off_t)0;
330 hdp->disksize += fs.avail;
333 printf("driver: adding holding disk %d dir %s size "
334 OFF_T_FMT " chunksize " OFF_T_FMT "\n",
335 dsk, holdingdisk_get_diskdir(hdp),
336 (OFF_T_FMT_TYPE)hdp->disksize,
337 (OFF_T_FMT_TYPE)(holdingdisk_get_chunksize(hdp)));
339 newdir = newvstralloc(newdir,
340 holdingdisk_get_diskdir(hdp), "/", hd_driver_timestamp,
342 if(!mkholdingdir(newdir)) {
343 hdp->disksize = (off_t)0;
345 total_disksize += hdp->disksize;
348 reserved_space = total_disksize * (off_t)(reserve / 100);
350 printf("reserving " OFF_T_FMT " out of " OFF_T_FMT
351 " for degraded-mode dumps\n",
352 (OFF_T_FMT_TYPE)reserved_space, (OFF_T_FMT_TYPE)free_space());
356 if(inparallel > MAX_DUMPERS) inparallel = MAX_DUMPERS;
358 /* taper takes a while to get going, so start it up right away */
361 if(conf_runtapes > 0) {
362 startup_tape_process(taper_program);
363 taper_cmd(START_TAPER, driver_timestamp, NULL, 0, NULL);
366 /* fire up the dumpers now while we are waiting */
367 if(!nodump) startup_dump_processes(dumper_program, inparallel, driver_timestamp);
370 * Read schedule from stdin. Usually, this is a pipe from planner,
371 * so the effect is that we wait here for the planner to
372 * finish, but meanwhile the taper is rewinding the tape, reading
373 * the label, checking it, writing a new label and all that jazz
374 * in parallel with the planner.
380 tapeq = read_flush();
382 roomq.head = roomq.tail = NULL;
384 log_add(L_STATS, "startup time %s", walltime_str(curclock()));
386 printf("driver: start time %s inparallel %d bandwidth %lu diskspace "
387 OFF_T_FMT " ", walltime_str(curclock()), inparallel,
388 free_kps((interface_t *)0), (OFF_T_FMT_TYPE)free_space());
389 printf(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n",
390 "OBSOLETE", driver_timestamp, taperalgo2str(conf_taperalgo),
391 getconf_str(CNF_DUMPORDER));
394 /* ok, planner is done, now lets see if the tape is ready */
396 if(conf_runtapes > 0) {
397 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
399 if(cmd != TAPER_OK) {
400 /* no tape, go into degraded mode: dump to holding disk */
408 tape_left = tape_length;
411 taper_ev_read = NULL;
412 if(!need_degraded) startaflush();
415 schedule_ev_read = event_register((event_id_t)0, EV_READFD, read_schedule, NULL);
420 /* handle any remaining dumps by dumping directly to tape, if possible */
422 while(!empty(runq) && taper > 0) {
423 diskp = dequeue_disk(&runq);
424 if (diskp->to_holdingdisk == HOLD_REQUIRED) {
425 log_add(L_FAIL, "%s %s %s %d [%s]",
426 diskp->host->hostname, diskp->name, sched(diskp)->datestamp,
428 "can't dump required holdingdisk");
430 else if (!degraded_mode) {
431 int rc = dump_to_tape(diskp);
434 "%s %s %d [dump to tape failed, will try again]",
435 diskp->host->hostname,
437 sched(diskp)->level);
439 log_add(L_FAIL, "%s %s %s %d [dump to tape failed]",
440 diskp->host->hostname,
442 sched(diskp)->datestamp,
443 sched(diskp)->level);
446 log_add(L_FAIL, "%s %s %s %d [%s]",
447 diskp->host->hostname, diskp->name, sched(diskp)->datestamp,
449 diskp->to_holdingdisk == HOLD_AUTO ?
450 "no more holding disk space" :
451 "can't dump no-hold disk in degraded mode");
454 short_dump_state(); /* for amstatus */
456 printf("driver: QUITTING time %s telling children to quit\n",
457 walltime_str(curclock()));
461 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
463 dumper_cmd(dumper, QUIT, NULL);
468 taper_cmd(QUIT, NULL, NULL, 0, NULL);
471 /* wait for all to die */
474 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
475 cleanup_holdingdisk(holdingdisk_get_diskdir(hdp), 0);
480 check_unfree_serial();
481 printf("driver: FINISHED time %s\n", walltime_str(curclock()));
483 log_add(L_FINISH,"date %s time %s", driver_timestamp, walltime_str(curclock()));
484 amfree(driver_timestamp);
486 free_new_argv(new_argc, new_argv);
487 amfree(dumper_program);
488 amfree(taper_program);
492 malloc_size_2 = malloc_inuse(&malloc_hist_2);
494 if(malloc_size_1 != malloc_size_2) {
495 malloc_list(fileno(stderr), malloc_hist_1, malloc_hist_2);
503 /* sleep up to count seconds, and wait for terminating child process */
504 /* if sleep is negative, this function will not timeout */
505 /* exit once all child process are finished or the timout expired */
506 /* return 0 if no more children to wait */
507 /* return 1 if some children are still alive */
509 wait_children(int count)
521 pid = waitpid((pid_t)-1, &retstat, WNOHANG);
525 if (! WIFEXITED(retstat)) {
527 code = WTERMSIG(retstat);
528 } else if (WEXITSTATUS(retstat) != 0) {
530 code = WEXITSTATUS(retstat);
533 for (dumper = dmptable; dumper < dmptable + inparallel;
535 if (pid == dumper->pid) {
536 who = stralloc(dumper->name);
540 if (dumper->chunker && pid == dumper->chunker->pid) {
541 who = stralloc(dumper->chunker->name);
542 dumper->chunker->pid = -1;
546 if (who == NULL && pid == taper_pid) {
547 who = stralloc("taper");
550 if(what != NULL && who == NULL) {
551 who = stralloc("unknown");
554 log_add(L_WARNING, "%s pid %u exited with %s %d\n", who,
555 (unsigned)pid, what, code);
556 printf("driver: %s pid %u exited with %s %d\n", who,
557 (unsigned)pid, what, code);
561 } while (pid > 0 || wait_errno == EINTR);
566 } while ((errno != ECHILD) && (count != 0));
567 return (errno != ECHILD);
571 kill_children(int signal)
576 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
577 if (!dumper->down && dumper->pid > 1) {
578 printf("driver: sending signal %d to %s pid %u\n", signal,
579 dumper->name, (unsigned)dumper->pid);
580 if (kill(dumper->pid, signal) == -1 && errno == ESRCH) {
582 dumper->chunker->pid = 0;
584 if (dumper->chunker && dumper->chunker->pid > 1) {
585 printf("driver: sending signal %d to %s pid %u\n", signal,
586 dumper->chunker->name,
587 (unsigned)dumper->chunker->pid);
588 if (kill(dumper->chunker->pid, signal) == -1 &&
590 dumper->chunker->pid = 0;
597 printf("driver: sending signal %d to %s pid %u\n", signal,
598 "taper", (unsigned)taper_pid);
599 if (kill(taper_pid, signal) == -1 && errno == ESRCH)
604 wait_for_children(void)
609 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
610 if (dumper->pid > 1 && dumper->fd >= 0) {
611 dumper_cmd(dumper, QUIT, NULL);
612 if (dumper->chunker && dumper->chunker->pid > 1 &&
613 dumper->chunker->fd >= 0)
614 chunker_cmd(dumper->chunker, QUIT, NULL);
619 if(taper_pid > 1 && taper > 0) {
620 taper_cmd(QUIT, NULL, NULL, 0, NULL);
623 if(wait_children(60) == 0)
626 kill_children(SIGHUP);
627 if(wait_children(60) == 0)
630 kill_children(SIGKILL);
631 if(wait_children(-1) == 0)
645 if(!degraded_mode && !taper_busy && !empty(tapeq)) {
647 datestamp = sched(tapeq.head)->datestamp;
648 switch(conf_taperalgo) {
650 dp = dequeue_disk(&tapeq);
654 while (fit != NULL) {
655 extra_tapes = (fit->tape_splitsize > (off_t)0) ?
656 conf_runtapes - current_tape : 0;
657 if(sched(fit)->act_size <= (tape_left +
658 tape_length * (off_t)extra_tapes) &&
659 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
667 if(dp) remove_disk(&tapeq, dp);
670 fit = dp = tapeq.head;
671 while (fit != NULL) {
672 if(sched(fit)->act_size > sched(dp)->act_size &&
673 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
678 if(dp) remove_disk(&tapeq, dp);
680 case ALGO_LARGESTFIT:
682 while (fit != NULL) {
683 extra_tapes = (fit->tape_splitsize > (off_t)0) ?
684 conf_runtapes - current_tape : 0;
685 if(sched(fit)->act_size <=
686 (tape_left + tape_length * (off_t)extra_tapes) &&
687 (!dp || sched(fit)->act_size > sched(dp)->act_size) &&
688 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
693 if(dp) remove_disk(&tapeq, dp);
699 remove_disk(&tapeq, dp);
702 if(!dp) { /* ALGO_SMALLEST, or default if nothing fit. */
703 if(conf_taperalgo != ALGO_SMALLEST) {
705 "driver: startaflush: Using SMALLEST because nothing fit\n");
707 fit = dp = tapeq.head;
708 while (fit != NULL) {
709 if(sched(fit)->act_size < sched(dp)->act_size &&
710 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
715 if(dp) remove_disk(&tapeq, dp);
717 if(taper_ev_read == NULL) {
718 taper_ev_read = event_register((event_id_t)taper, EV_READFD,
719 handle_taper_result, NULL);
724 qname = quote_string(dp->name);
725 taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
726 sched(dp)->datestamp);
727 fprintf(stderr,"driver: startaflush: %s %s %s "
728 OFF_T_FMT " " OFF_T_FMT "\n",
729 taperalgo2str(conf_taperalgo), dp->host->hostname, qname,
730 (OFF_T_FMT_TYPE)sched(taper_disk)->act_size,
731 (OFF_T_FMT_TYPE)tape_left);
732 if(sched(dp)->act_size <= tape_left)
733 tape_left -= sched(dp)->act_size;
735 tape_left = (off_t)0;
738 error("FATAL: Taper marked busy and no work found.");
741 } else if(!taper_busy && taper_ev_read != NULL) {
742 event_release(taper_ev_read);
743 taper_ev_read = NULL;
754 /* first, check if host is too busy */
756 if(dp->host->inprogress >= dp->host->maxdumps) {
760 /* next, check conflict with other dumps on same spindle */
762 if(dp->spindle == -1) { /* but spindle -1 never conflicts by def. */
766 for(dp2 = dp->host->disks; dp2 != NULL; dp2 = dp2->hostnext)
767 if(dp2->inprogress && dp2->spindle == dp->spindle) {
779 disk_t *diskp, *delayed_diskp, *diskp_accept;
780 assignedhd_t **holdp=NULL, **holdp_accept;
781 const time_t now = time(NULL);
784 char *result_argv[MAX_ARGS+1];
790 idle_reason = IDLE_NO_DUMPERS;
793 if(dumpers_ev_time != NULL) {
794 event_release(dumpers_ev_time);
795 dumpers_ev_time = NULL;
798 for (dumper = dmptable; dumper < dmptable+inparallel; dumper++) {
804 if (dumper->ev_read != NULL) {
805 event_release(dumper->ev_read);
806 dumper->ev_read = NULL;
810 * A potential problem with starting from the bottom of the dump time
811 * distribution is that a slave host will have both one of the shortest
812 * and one of the longest disks, so starting its shortest disk first will
813 * tie up the host and eliminate its longest disk from consideration the
814 * first pass through. This could cause a big delay in starting that long
815 * disk, which could drag out the whole night's dumps.
817 * While starting from the top of the dump time distribution solves the
818 * above problem, this turns out to be a bad idea, because the big dumps
819 * will almost certainly pack the holding disk completely, leaving no
820 * room for even one small dump to start. This ends up shutting out the
821 * small-end dumpers completely (they stay idle).
823 * The introduction of multiple simultaneous dumps to one host alleviates
824 * the biggest&smallest dumps problem: both can be started at the
830 delayed_diskp = NULL;
834 dumporder = getconf_str(CNF_DUMPORDER);
835 if(strlen(dumporder) > (size_t)(dumper-dmptable)) {
836 dumptype = dumporder[dumper-dmptable];
839 if(dumper-dmptable < 3)
845 for(diskp = rq->head; diskp != NULL; diskp = diskp->next) {
846 assert(diskp->host != NULL && sched(diskp) != NULL);
848 if (diskp->host->start_t > now) {
849 cur_idle = max(cur_idle, IDLE_START_WAIT);
850 if (delayed_diskp == NULL || sleep_time > diskp->host->start_t) {
851 delayed_diskp = diskp;
852 sleep_time = diskp->host->start_t;
854 } else if(diskp->start_t > now) {
855 cur_idle = max(cur_idle, IDLE_START_WAIT);
856 if (delayed_diskp == NULL || sleep_time > diskp->start_t) {
857 delayed_diskp = diskp;
858 sleep_time = diskp->start_t;
860 } else if (diskp->host->netif->curusage > 0 &&
861 sched(diskp)->est_kps > free_kps(diskp->host->netif)) {
862 cur_idle = max(cur_idle, IDLE_NO_BANDWIDTH);
863 } else if(sched(diskp)->no_space) {
864 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
865 } else if (diskp->to_holdingdisk == HOLD_NEVER) {
866 cur_idle = max(cur_idle, IDLE_NO_HOLD);
868 find_diskspace(sched(diskp)->est_size, &cur_idle, NULL)) == NULL) {
869 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
870 } else if (client_constrained(diskp)) {
871 free_assignedhd(holdp);
872 cur_idle = max(cur_idle, IDLE_CLIENT_CONSTRAINED);
875 /* disk fits, dump it */
876 int accept = !diskp_accept;
879 case 's': accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
881 case 'S': accept = (sched(diskp)->est_size > sched(diskp_accept)->est_size);
883 case 't': accept = (sched(diskp)->est_time < sched(diskp_accept)->est_time);
885 case 'T': accept = (sched(diskp)->est_time > sched(diskp_accept)->est_time);
887 case 'b': accept = (sched(diskp)->est_kps < sched(diskp_accept)->est_kps);
889 case 'B': accept = (sched(diskp)->est_kps > sched(diskp_accept)->est_kps);
891 default: log_add(L_WARNING, "Unknown dumporder character \'%c\', using 's'.\n",
893 accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
898 if( !diskp_accept || !degraded_mode || diskp->priority >= diskp_accept->priority) {
899 if(holdp_accept) free_assignedhd(holdp_accept);
900 diskp_accept = diskp;
901 holdp_accept = holdp;
904 free_assignedhd(holdp);
908 free_assignedhd(holdp);
913 diskp = diskp_accept;
914 holdp = holdp_accept;
916 idle_reason = max(idle_reason, cur_idle);
919 * If we have no disk at this point, and there are disks that
920 * are delayed, then schedule a time event to call this dumper
921 * with the disk with the shortest delay.
923 if (diskp == NULL && delayed_diskp != NULL) {
924 assert(sleep_time > now);
926 dumpers_ev_time = event_register((event_id_t)sleep_time, EV_TIME,
927 handle_dumpers_time, &runq);
929 } else if (diskp != NULL) {
930 sched(diskp)->act_size = (off_t)0;
931 allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
932 sched(diskp)->activehd = assign_holdingdisk(holdp, diskp);
934 sched(diskp)->destname = newstralloc(sched(diskp)->destname,
935 sched(diskp)->holdp[0]->destname);
936 diskp->host->inprogress++; /* host is now busy */
937 diskp->inprogress = 1;
938 sched(diskp)->dumper = dumper;
939 sched(diskp)->timestamp = now;
941 dumper->busy = 1; /* dumper is now busy */
942 dumper->dp = diskp; /* link disk to dumper */
943 remove_disk(rq, diskp); /* take it off the run queue */
945 sched(diskp)->origsize = (off_t)-1;
946 sched(diskp)->dumpsize = (off_t)-1;
947 sched(diskp)->dumptime = (time_t)0;
948 sched(diskp)->tapetime = (time_t)0;
949 chunker = dumper->chunker;
950 chunker->result = LAST_TOK;
951 dumper->result = LAST_TOK;
952 startup_chunk_process(chunker,chunker_program);
953 chunker_cmd(chunker, START, (void *)driver_timestamp);
954 chunker->dumper = dumper;
955 chunker_cmd(chunker, PORT_WRITE, diskp);
956 cmd = getresult(chunker->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
958 assignedhd_t **h=NULL;
961 printf("driver: did not get PORT from %s for %s:%s\n",
962 chunker->name, diskp->host->hostname, diskp->name);
965 deallocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
966 h = sched(diskp)->holdp;
967 activehd = sched(diskp)->activehd;
968 h[activehd]->used = 0;
969 holdalloc(h[activehd]->disk)->allocated_dumpers--;
970 adjust_diskspace(diskp, DONE);
971 delete_diskspace(diskp);
972 diskp->host->inprogress--;
973 diskp->inprogress = 0;
974 sched(diskp)->dumper = NULL;
977 sched(diskp)->attempted++;
978 free_serial_dp(diskp);
979 if(sched(diskp)->attempted < 2)
980 enqueue_disk(rq, diskp);
983 dumper->ev_read = event_register((event_id_t)dumper->fd, EV_READFD,
984 handle_dumper_result, dumper);
985 chunker->ev_read = event_register((event_id_t)chunker->fd, EV_READFD,
986 handle_chunker_result, chunker);
987 dumper->output_port = atoi(result_argv[2]);
989 dumper_cmd(dumper, PORT_DUMP, diskp);
991 diskp->host->start_t = now + 15;
997 * This gets called when a dumper is delayed for some reason. It may
998 * be because a disk has a delayed start, or amanda is constrained
999 * by network or disk limits.
1003 handle_dumpers_time(
1006 disklist_t *runq = cookie;
1007 event_release(dumpers_ev_time);
1008 dumpers_ev_time = NULL;
1009 start_some_dumps(runq);
1020 printf("dump of driver schedule %s:\n--------\n", str);
1022 for(dp = qp->head; dp != NULL; dp = dp->next) {
1023 qname = quote_string(dp->name);
1024 printf(" %-20s %-25s lv %d t %5lu s " OFF_T_FMT " p %d\n",
1025 dp->host->hostname, qname, sched(dp)->level,
1026 sched(dp)->est_time,
1027 (OFF_T_FMT_TYPE)sched(dp)->est_size, sched(dp)->priority);
1030 printf("--------\n");
1034 start_degraded_mode(
1035 /*@keep@*/ disklist_t *queuep)
1039 off_t est_full_size;
1042 if (taper_ev_read != NULL) {
1043 event_release(taper_ev_read);
1044 taper_ev_read = NULL;
1047 newq.head = newq.tail = 0;
1049 dump_schedule(queuep, "before start degraded mode");
1051 est_full_size = (off_t)0;
1052 while(!empty(*queuep)) {
1053 dp = dequeue_disk(queuep);
1055 qname = quote_string(dp->name);
1056 if(sched(dp)->level != 0)
1057 /* go ahead and do the disk as-is */
1058 enqueue_disk(&newq, dp);
1060 if (reserved_space + est_full_size + sched(dp)->est_size
1061 <= total_disksize) {
1062 enqueue_disk(&newq, dp);
1063 est_full_size += sched(dp)->est_size;
1065 else if(sched(dp)->degr_level != -1) {
1066 sched(dp)->level = sched(dp)->degr_level;
1067 sched(dp)->dumpdate = sched(dp)->degr_dumpdate;
1068 sched(dp)->est_nsize = sched(dp)->degr_nsize;
1069 sched(dp)->est_csize = sched(dp)->degr_csize;
1070 sched(dp)->est_time = sched(dp)->degr_time;
1071 sched(dp)->est_kps = sched(dp)->degr_kps;
1072 enqueue_disk(&newq, dp);
1075 log_add(L_FAIL,"%s %s %s %d [can't switch to incremental dump]",
1076 dp->host->hostname, qname, sched(dp)->datestamp,
1083 /*@i@*/ *queuep = newq;
1086 dump_schedule(queuep, "after start degraded mode");
1091 continue_port_dumps(void)
1095 int active_dumpers=0, busy_dumpers=0, i;
1098 /* First we try to grant diskspace to some dumps waiting for it. */
1099 for( dp = roomq.head; dp; dp = ndp ) {
1101 /* find last holdingdisk used by this dump */
1102 for( i = 0, h = sched(dp)->holdp; h[i+1]; i++ ) {
1103 (void)h; /* Quiet lint */
1105 /* find more space */
1106 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1107 &active_dumpers, h[i] );
1109 for(dumper = dmptable; dumper < dmptable + inparallel &&
1110 dumper->dp != dp; dumper++) {
1111 (void)dp; /* Quiet lint */
1113 assert( dumper < dmptable + inparallel );
1114 sched(dp)->activehd = assign_holdingdisk( h, dp );
1115 chunker_cmd( dumper->chunker, CONTINUE, dp );
1117 remove_disk( &roomq, dp );
1121 /* So for some disks there is less holding diskspace available than
1122 * was asked for. Possible reasons are
1123 * a) diskspace has been allocated for other dumps which are
1124 * still running or already being written to tape
1125 * b) all other dumps have been suspended due to lack of diskspace
1126 * c) this dump doesn't fit on all the holding disks
1127 * Case a) is not a problem. We just wait for the diskspace to
1128 * be freed by moving the current disk to a queue.
1129 * If case b) occurs, we have a deadlock situation. We select
1130 * a dump from the queue to be aborted and abort it. It will
1131 * be retried later dumping to disk.
1132 * If case c) is detected, the dump is aborted. Next time
1133 * it will be dumped directly to tape. Actually, case c is a special
1134 * manifestation of case b) where only one dumper is busy.
1136 for(dp=NULL, dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
1137 if( dumper->busy ) {
1139 if( !find_disk(&roomq, dumper->dp) ) {
1142 sched(dp)->est_size > sched(dumper->dp)->est_size ) {
1147 if((dp != NULL) && (active_dumpers == 0) && (busy_dumpers > 0) &&
1148 ((!taper_busy && empty(tapeq)) || degraded_mode) &&
1149 pending_aborts == 0 ) { /* not case a */
1150 if( busy_dumpers == 1 ) { /* case c */
1151 sched(dp)->no_space = 1;
1154 /* At this time, dp points to the dump with the smallest est_size.
1155 * We abort that dump, hopefully not wasting too much time retrying it.
1157 remove_disk( &roomq, dp );
1158 chunker_cmd( sched(dp)->dumper->chunker, ABORT, NULL);
1159 dumper_cmd( sched(dp)->dumper, ABORT, NULL );
1166 handle_taper_result(
1173 char *result_argv[MAX_ARGS+1];
1174 int avail_tapes = 0;
1176 (void)cookie; /* Quiet unused parameter warning */
1178 assert(cookie == NULL);
1184 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
1189 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
1190 if(result_argc != 5) {
1191 error("error: [taper DONE result_argc != 5: %d", result_argc);
1195 dp = serial2disk(result_argv[2]);
1196 free_serial(result_argv[2]);
1198 filenum = OFF_T_ATOI(result_argv[4]);
1200 update_info_taper(dp, result_argv[3], filenum,
1204 delete_diskspace(dp);
1206 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1207 walltime_str(curclock()), dp->host->hostname, dp->name);
1210 amfree(sched(dp)->destname);
1211 amfree(sched(dp)->dumpdate);
1212 amfree(sched(dp)->degr_dumpdate);
1213 amfree(sched(dp)->datestamp);
1220 /* continue with those dumps waiting for diskspace */
1221 continue_port_dumps();
1224 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
1225 if (result_argc < 2) {
1226 error("error [taper TRYAGAIN result_argc < 2: %d]",
1230 dp = serial2disk(result_argv[2]);
1231 free_serial(result_argv[2]);
1232 printf("driver: taper-tryagain time %s disk %s:%s\n",
1233 walltime_str(curclock()), dp->host->hostname, dp->name);
1236 /* See how many tapes we have left, but we alwyays
1237 retry once (why?) */
1239 if(dp->tape_splitsize > (off_t)0)
1240 avail_tapes = conf_runtapes - current_tape;
1244 if(sched(dp)->attempted > avail_tapes) {
1245 log_add(L_FAIL, "%s %s %s %d [too many taper retries]",
1246 dp->host->hostname, dp->name, sched(dp)->datestamp,
1248 printf("driver: taper failed %s %s %s, too many taper retry\n",
1249 result_argv[2], dp->host->hostname, dp->name);
1252 /* Re-insert into taper queue. */
1253 sched(dp)->attempted++;
1254 headqueue_disk(&tapeq, dp);
1257 tape_left = tape_length;
1259 /* run next thing from queue */
1264 continue_port_dumps();
1267 case SPLIT_CONTINUE: /* SPLIT_CONTINUE <handle> <new_label> */
1268 if (result_argc != 3) {
1269 error("error [taper SPLIT_CONTINUE result_argc != 3: %d]",
1275 case SPLIT_NEEDNEXT: /* SPLIT-NEEDNEXT <handle> <kb written> */
1276 if (result_argc != 3) {
1277 error("error [taper SPLIT_NEEDNEXT result_argc != 3: %d]",
1282 /* Update our tape counter and reset tape_left */
1284 tape_left = tape_length;
1286 /* Reduce the size of the dump by amount written and reduce
1287 tape_left by the amount left over */
1288 dp = serial2disk(result_argv[2]);
1289 sched(dp)->act_size -= OFF_T_ATOI(result_argv[3]);
1290 if (sched(dp)->act_size < tape_left)
1291 tape_left -= sched(dp)->act_size;
1297 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
1298 dp = serial2disk(result_argv[2]);
1299 free_serial(result_argv[2]);
1300 printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1301 walltime_str(curclock()), dp->host->hostname, dp->name);
1303 log_add(L_WARNING, "Taper error: %s", result_argv[3]);
1308 log_add(L_WARNING, "Taper protocol error");
1311 * Since we received a taper error, we can't send anything more
1312 * to the taper. Go into degraded mode to try to get everthing
1313 * onto disk. Later, these dumps can be flushed to a new tape.
1314 * The tape queue is zapped so that it appears empty in future
1315 * checks. If there are dumps waiting for diskspace to be freed,
1320 "going into degraded mode because of taper component error.");
1321 start_degraded_mode(&runq);
1323 tapeq.head = tapeq.tail = NULL;
1326 if(taper_ev_read != NULL) {
1327 event_release(taper_ev_read);
1328 taper_ev_read = NULL;
1330 if(cmd != TAPE_ERROR) aclose(taper);
1331 continue_port_dumps();
1335 error("driver received unexpected token (%s) from taper",
1340 * Wakeup any dumpers that are sleeping because of network
1341 * or disk constraints.
1343 start_some_dumps(&runq);
1345 } while(areads_dataready(taper));
1353 for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1354 if(!dumper->busy && !dumper->down) return dumper;
1365 assignedhd_t **h=NULL;
1371 dumper = sched(dp)->dumper;
1372 chunker = dumper->chunker;
1376 h = sched(dp)->holdp;
1377 activehd = sched(dp)->activehd;
1379 if(dumper->result == DONE && chunker->result == DONE) {
1380 update_info_dumper(dp, sched(dp)->origsize,
1381 sched(dp)->dumpsize, sched(dp)->dumptime);
1382 log_add(L_STATS, "estimate %s %s %s %d [sec %ld nkb " OFF_T_FMT
1383 " ckb " OFF_T_FMT " kps %lu]",
1384 dp->host->hostname, dp->name, sched(dp)->datestamp,
1386 sched(dp)->est_time, (OFF_T_FMT_TYPE)sched(dp)->est_nsize,
1387 (OFF_T_FMT_TYPE)sched(dp)->est_csize,
1388 sched(dp)->est_kps);
1391 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1393 is_partial = dumper->result != DONE || chunker->result != DONE;
1394 rename_tmp_holding(sched(dp)->destname, !is_partial);
1397 for( i = 0, h = sched(dp)->holdp; i < activehd; i++ ) {
1398 dummy += h[i]->used;
1401 size = size_holding_files(sched(dp)->destname, 0);
1402 h[activehd]->used = size - dummy;
1403 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1404 adjust_diskspace(dp, DONE);
1406 sched(dp)->attempted += 1;
1408 if((dumper->result != DONE || chunker->result != DONE) &&
1409 sched(dp)->attempted <= 1) {
1410 delete_diskspace(dp);
1411 enqueue_disk(&runq, dp);
1413 else if(size > (off_t)DISK_BLOCK_KB) {
1414 sched(dp)->attempted = 0;
1415 enqueue_disk(&tapeq, dp);
1419 delete_diskspace(dp);
1423 dp->host->inprogress -= 1;
1426 waitpid(chunker->pid, NULL, 0 );
1427 aclose(chunker->fd);
1432 if (chunker->result == ABORT_FINISHED)
1434 continue_port_dumps();
1436 * Wakeup any dumpers that are sleeping because of network
1437 * or disk constraints.
1439 start_some_dumps(&runq);
1444 handle_dumper_result(
1447 /*static int pending_aborts = 0;*/
1448 dumper_t *dumper = cookie;
1453 char *result_argv[MAX_ARGS+1];
1455 assert(dumper != NULL);
1457 assert(dp != NULL && sched(dp) != NULL);
1463 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1466 /* result_argv[2] always contains the serial number */
1467 sdp = serial2disk(result_argv[2]);
1469 error("%s: Invalid serial number: %s", get_pname(), result_argv[2]);
1474 qname = quote_string(dp->name);
1477 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
1478 if(result_argc != 6) {
1479 error("error [dumper DONE result_argc != 6: %d]", result_argc);
1483 /*free_serial(result_argv[2]);*/
1485 sched(dp)->origsize = OFF_T_ATOI(result_argv[3]);
1486 sched(dp)->dumptime = TIME_T_ATOI(result_argv[5]);
1488 printf("driver: finished-cmd time %s %s dumped %s:%s\n",
1489 walltime_str(curclock()), dumper->name,
1490 dp->host->hostname, qname);
1493 dumper->result = cmd;
1497 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1499 * Requeue this disk, and fall through to the FAILED
1502 if(sched(dp)->attempted) {
1503 log_add(L_FAIL, "%s %s %s %d [too many dumper retry: %s]",
1504 dp->host->hostname, dp->name, sched(dp)->datestamp,
1505 sched(dp)->level, result_argv[3]);
1506 printf("driver: dump failed %s %s %s, too many dumper retry: %s\n",
1507 result_argv[2], dp->host->hostname, dp->name,
1511 case FAILED: /* FAILED <handle> <errstr> */
1512 /*free_serial(result_argv[2]);*/
1513 dumper->result = cmd;
1516 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1518 * We sent an ABORT from the NO-ROOM case because this dump
1519 * wasn't going to fit onto the holding disk. We now need to
1520 * clean up the remains of this image, and try to finish
1521 * other dumps that are waiting on disk space.
1523 assert(pending_aborts);
1524 /*free_serial(result_argv[2]);*/
1525 dumper->result = cmd;
1529 /* either EOF or garbage from dumper. Turn it off */
1530 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1531 dumper->name, (long)dumper->pid);
1532 if (dumper->ev_read) {
1533 event_release(dumper->ev_read);
1534 dumper->ev_read = NULL;
1538 dumper->down = 1; /* mark it down so it isn't used again */
1540 /* if it was dumping something, zap it and try again */
1541 if(sched(dp)->attempted) {
1542 log_add(L_FAIL, "%s %s %s %d [%s died]",
1543 dp->host->hostname, qname, sched(dp)->datestamp,
1544 sched(dp)->level, dumper->name);
1547 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1548 dumper->name, dp->host->hostname, qname,
1552 dumper->result = cmd;
1560 /* send the dumper result to the chunker */
1561 if(dumper->chunker->down == 0 && dumper->chunker->fd != -1 &&
1562 dumper->chunker->result == LAST_TOK) {
1564 chunker_cmd(dumper->chunker, DONE, dp);
1567 chunker_cmd(dumper->chunker, FAILED, dp);
1571 if(dumper->result != LAST_TOK && dumper->chunker->result != LAST_TOK)
1574 } while(areads_dataready(dumper->fd));
1579 handle_chunker_result(
1582 /*static int pending_aborts = 0;*/
1583 chunker_t *chunker = cookie;
1584 assignedhd_t **h=NULL;
1589 char *result_argv[MAX_ARGS+1];
1594 assert(chunker != NULL);
1595 dumper = chunker->dumper;
1596 assert(dumper != NULL);
1599 assert(sched(dp) != NULL);
1600 assert(sched(dp)->destname != NULL);
1601 assert(dp != NULL && sched(dp) != NULL && sched(dp)->destname);
1603 if(dp && sched(dp) && sched(dp)->holdp) {
1604 h = sched(dp)->holdp;
1605 activehd = sched(dp)->activehd;
1612 cmd = getresult(chunker->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1615 /* result_argv[2] always contains the serial number */
1616 sdp = serial2disk(result_argv[2]);
1618 error("%s: Invalid serial number: %s", get_pname(), result_argv[2]);
1625 case PARTIAL: /* PARTIAL <handle> <dumpsize> <errstr> */
1626 case DONE: /* DONE <handle> <dumpsize> <errstr> */
1627 if(result_argc != 4) {
1628 error("error [chunker %s result_argc != 4: %d]", cmdstr[cmd],
1632 /*free_serial(result_argv[2]);*/
1634 sched(dp)->dumpsize = (off_t)atof(result_argv[3]);
1636 qname = quote_string(dp->name);
1637 printf("driver: finished-cmd time %s %s chunked %s:%s\n",
1638 walltime_str(curclock()), chunker->name,
1639 dp->host->hostname, qname);
1643 event_release(chunker->ev_read);
1645 chunker->result = cmd;
1649 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1650 event_release(chunker->ev_read);
1652 chunker->result = cmd;
1655 case FAILED: /* FAILED <handle> <errstr> */
1656 /*free_serial(result_argv[2]);*/
1658 event_release(chunker->ev_read);
1660 chunker->result = cmd;
1664 case NO_ROOM: /* NO-ROOM <handle> <missing_size> */
1665 if (!h || activehd < 0) { /* should never happen */
1666 error("!h || activehd < 0");
1669 h[activehd]->used -= OFF_T_ATOI(result_argv[3]);
1670 h[activehd]->reserved -= OFF_T_ATOI(result_argv[3]);
1671 holdalloc(h[activehd]->disk)->allocated_space -= OFF_T_ATOI(result_argv[3]);
1672 h[activehd]->disk->disksize -= OFF_T_ATOI(result_argv[3]);
1675 case RQ_MORE_DISK: /* RQ-MORE-DISK <handle> */
1676 if (!h || activehd < 0) { /* should never happen */
1677 error("!h || activehd < 0");
1680 holdalloc(h[activehd]->disk)->allocated_dumpers--;
1681 h[activehd]->used = h[activehd]->reserved;
1682 if( h[++activehd] ) { /* There's still some allocated space left.
1683 * Tell the dumper about it. */
1684 sched(dp)->activehd++;
1685 chunker_cmd( chunker, CONTINUE, dp );
1686 } else { /* !h[++activehd] - must allocate more space */
1687 sched(dp)->act_size = sched(dp)->est_size; /* not quite true */
1688 sched(dp)->est_size = (sched(dp)->act_size/(off_t)20) * (off_t)21; /* +5% */
1689 sched(dp)->est_size = am_round(sched(dp)->est_size, (off_t)DISK_BLOCK_KB);
1690 if (sched(dp)->est_size < sched(dp)->act_size + 2*DISK_BLOCK_KB)
1691 sched(dp)->est_size += 2 * DISK_BLOCK_KB;
1692 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1696 /* No diskspace available. The reason for this will be
1697 * determined in continue_port_dumps(). */
1698 enqueue_disk( &roomq, dp );
1699 continue_port_dumps();
1701 /* OK, allocate space for disk and have chunker continue */
1702 sched(dp)->activehd = assign_holdingdisk( h, dp );
1703 chunker_cmd( chunker, CONTINUE, dp );
1709 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1711 * We sent an ABORT from the NO-ROOM case because this dump
1712 * wasn't going to fit onto the holding disk. We now need to
1713 * clean up the remains of this image, and try to finish
1714 * other dumps that are waiting on disk space.
1716 /*assert(pending_aborts);*/
1718 /*free_serial(result_argv[2]);*/
1720 event_release(chunker->ev_read);
1722 chunker->result = cmd;
1727 /* either EOF or garbage from chunker. Turn it off */
1728 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1729 chunker->name, (long)chunker->pid);
1732 /* if it was dumping something, zap it and try again */
1733 if (!h || activehd < 0) { /* should never happen */
1734 error("!h || activehd < 0");
1737 qname = quote_string(dp->name);
1738 if(sched(dp)->attempted) {
1739 log_add(L_FAIL, "%s %s %s %d [%s died]",
1740 dp->host->hostname, qname, sched(dp)->datestamp,
1741 sched(dp)->level, chunker->name);
1744 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1745 chunker->name, dp->host->hostname, qname,
1752 event_release(chunker->ev_read);
1754 chunker->result = cmd;
1762 if(chunker->result != LAST_TOK && chunker->dumper->result != LAST_TOK)
1765 } while(areads_dataready(chunker->fd));
1776 char *hostname, *diskname, *datestamp;
1780 char *inpline = NULL;
1787 tq.head = tq.tail = NULL;
1789 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1791 if (inpline[0] == '\0')
1797 skip_whitespace(s, ch); /* find the command */
1799 error("flush line %d: syntax error (no command)", line);
1803 skip_non_whitespace(s, ch);
1806 if(strcmp(command,"ENDFLUSH") == 0) {
1810 if(strcmp(command,"FLUSH") != 0) {
1811 error("flush line %d: syntax error (%s != FLUSH)", line, command);
1815 skip_whitespace(s, ch); /* find the hostname */
1817 error("flush line %d: syntax error (no hostname)", line);
1821 skip_non_whitespace(s, ch);
1824 skip_whitespace(s, ch); /* find the diskname */
1826 error("flush line %d: syntax error (no diskname)", line);
1830 skip_quoted_string(s, ch);
1831 s[-1] = '\0'; /* terminate the disk name */
1832 diskname = unquote_string(qname);
1834 skip_whitespace(s, ch); /* find the datestamp */
1836 error("flush line %d: syntax error (no datestamp)", line);
1840 skip_non_whitespace(s, ch);
1843 skip_whitespace(s, ch); /* find the level number */
1844 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1845 error("flush line %d: syntax error (bad level)", line);
1848 skip_integer(s, ch);
1850 skip_whitespace(s, ch); /* find the filename */
1852 error("flush line %d: syntax error (no filename)", line);
1856 skip_non_whitespace(s, ch);
1859 get_dumpfile(destname, &file);
1860 if( file.type != F_DUMPFILE) {
1861 if( file.type != F_CONT_DUMPFILE )
1862 log_add(L_INFO, "%s: ignoring cruft file.", destname);
1867 if(strcmp(hostname, file.name) != 0 ||
1868 strcmp(diskname, file.disk) != 0 ||
1869 strcmp(datestamp, file.datestamp) != 0) {
1870 log_add(L_INFO, "disk %s:%s not consistent with file %s",
1871 hostname, diskname, destname);
1877 dp = lookup_disk(file.name, file.disk);
1880 log_add(L_INFO, "%s: disk %s:%s not in database, skipping it.",
1881 destname, file.name, file.disk);
1885 if(file.dumplevel < 0 || file.dumplevel > 9) {
1886 log_add(L_INFO, "%s: ignoring file with bogus dump level %d.",
1887 destname, file.dumplevel);
1891 dp1 = (disk_t *)alloc(SIZEOF(disk_t));
1893 dp1->next = dp1->prev = NULL;
1895 /* add it to the flushhost list */
1897 flushhost = alloc(SIZEOF(am_host_t));
1898 flushhost->next = NULL;
1899 flushhost->hostname = stralloc("FLUSHHOST");
1900 flushhost->up = NULL;
1901 flushhost->features = NULL;
1903 dp1->hostnext = flushhost->disks;
1904 flushhost->disks = dp1;
1906 sp = (sched_t *) alloc(SIZEOF(sched_t));
1907 sp->destname = stralloc(destname);
1908 sp->level = file.dumplevel;
1909 sp->dumpdate = NULL;
1910 sp->degr_dumpdate = NULL;
1911 sp->datestamp = stralloc(file.datestamp);
1912 sp->est_nsize = (off_t)0;
1913 sp->est_csize = (off_t)0;
1917 sp->degr_level = -1;
1919 sp->act_size = size_holding_files(destname, 0);
1920 sp->holdp = build_diskspace(destname);
1921 if(sp->holdp == NULL) continue;
1923 sp->timestamp = (time_t)0;
1925 dp1->up = (char *)sp;
1927 enqueue_disk(&tq, dp1);
1940 int level, line, priority;
1941 char *dumpdate, *degr_dumpdate;
1943 time_t time, degr_time;
1944 time_t *time_p = &time;
1945 time_t *degr_time_p = °r_time;
1946 off_t nsize, csize, degr_nsize, degr_csize;
1947 unsigned long kps, degr_kps;
1948 char *hostname, *features, *diskname, *datestamp, *inpline = NULL;
1952 off_t flush_size = (off_t)0;
1954 OFF_T_FMT_TYPE nsize_;
1955 OFF_T_FMT_TYPE csize_;
1956 OFF_T_FMT_TYPE degr_nsize_;
1957 OFF_T_FMT_TYPE degr_csize_;
1959 (void)cookie; /* Quiet unused parameter warning */
1961 event_release(schedule_ev_read);
1963 /* read schedule from stdin */
1965 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1966 if (inpline[0] == '\0')
1973 skip_whitespace(s, ch); /* find the command */
1975 error("schedule line %d: syntax error (no command)", line);
1979 skip_non_whitespace(s, ch);
1982 if(strcmp(command,"DUMP") != 0) {
1983 error("schedule line %d: syntax error (%s != DUMP)", line, command);
1987 skip_whitespace(s, ch); /* find the host name */
1989 error("schedule line %d: syntax error (no host name)", line);
1993 skip_non_whitespace(s, ch);
1996 skip_whitespace(s, ch); /* find the feature list */
1998 error("schedule line %d: syntax error (no feature list)", line);
2002 skip_non_whitespace(s, ch);
2005 skip_whitespace(s, ch); /* find the disk name */
2007 error("schedule line %d: syntax error (no disk name)", line);
2011 skip_quoted_string(s, ch);
2012 s[-1] = '\0'; /* terminate the disk name */
2013 diskname = unquote_string(qname);
2015 skip_whitespace(s, ch); /* find the datestamp */
2017 error("schedule line %d: syntax error (no datestamp)", line);
2021 skip_non_whitespace(s, ch);
2024 skip_whitespace(s, ch); /* find the priority number */
2025 if(ch == '\0' || sscanf(s - 1, "%d", &priority) != 1) {
2026 error("schedule line %d: syntax error (bad priority)", line);
2029 skip_integer(s, ch);
2031 skip_whitespace(s, ch); /* find the level number */
2032 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
2033 error("schedule line %d: syntax error (bad level)", line);
2036 skip_integer(s, ch);
2038 skip_whitespace(s, ch); /* find the dump date */
2040 error("schedule line %d: syntax error (bad dump date)", line);
2044 skip_non_whitespace(s, ch);
2047 skip_whitespace(s, ch); /* find the native size */
2048 nsize_ = (OFF_T_FMT_TYPE)0;
2049 if(ch == '\0' || sscanf(s - 1, OFF_T_FMT, &nsize_) != 1) {
2050 error("schedule line %d: syntax error (bad nsize)", line);
2054 skip_integer(s, ch);
2056 skip_whitespace(s, ch); /* find the compressed size */
2057 csize_ = (OFF_T_FMT_TYPE)0;
2058 if(ch == '\0' || sscanf(s - 1, OFF_T_FMT, &csize_) != 1) {
2059 error("schedule line %d: syntax error (bad csize)", line);
2063 skip_integer(s, ch);
2065 skip_whitespace(s, ch); /* find the time number */
2066 if(ch == '\0' || sscanf(s - 1, TIME_T_FMT,
2067 (TIME_T_FMT_TYPE *)time_p) != 1) {
2068 error("schedule line %d: syntax error (bad estimated time)", line);
2071 skip_integer(s, ch);
2073 skip_whitespace(s, ch); /* find the kps number */
2074 if(ch == '\0' || sscanf(s - 1, "%lu", &kps) != 1) {
2075 error("schedule line %d: syntax error (bad kps)", line);
2078 skip_integer(s, ch);
2080 degr_dumpdate = NULL; /* flag if degr fields found */
2081 skip_whitespace(s, ch); /* find the degr level number */
2083 if(sscanf(s - 1, "%d", °r_level) != 1) {
2084 error("schedule line %d: syntax error (bad degr level)", line);
2087 skip_integer(s, ch);
2089 skip_whitespace(s, ch); /* find the degr dump date */
2091 error("schedule line %d: syntax error (bad degr dump date)", line);
2094 degr_dumpdate = s - 1;
2095 skip_non_whitespace(s, ch);
2098 skip_whitespace(s, ch); /* find the degr native size */
2099 degr_nsize_ = (OFF_T_FMT_TYPE)0;
2100 if(ch == '\0' || sscanf(s - 1, OFF_T_FMT, °r_nsize_) != 1) {
2101 error("schedule line %d: syntax error (bad degr nsize)", line);
2104 degr_nsize = degr_nsize_;
2105 skip_integer(s, ch);
2107 skip_whitespace(s, ch); /* find the degr compressed size */
2108 degr_csize_ = (OFF_T_FMT_TYPE)0;
2109 if(ch == '\0' || sscanf(s - 1, OFF_T_FMT, °r_csize_) != 1) {
2110 error("schedule line %d: syntax error (bad degr csize)", line);
2113 degr_csize = degr_csize_;
2114 skip_integer(s, ch);
2116 skip_whitespace(s, ch); /* find the degr time number */
2117 if(ch == '\0' || sscanf(s - 1, TIME_T_FMT,
2118 (TIME_T_FMT_TYPE *)degr_time_p) != 1) {
2119 error("schedule line %d: syntax error (bad degr estimated time)", line);
2122 skip_integer(s, ch);
2124 skip_whitespace(s, ch); /* find the degr kps number */
2125 if(ch == '\0' || sscanf(s - 1, "%lu", °r_kps) != 1) {
2126 error("schedule line %d: syntax error (bad degr kps)", line);
2129 skip_integer(s, ch);
2132 dp = lookup_disk(hostname, diskname);
2135 "schedule line %d: %s:'%s' not in disklist, ignored",
2136 line, hostname, qname);
2141 sp = (sched_t *) alloc(SIZEOF(sched_t));
2144 sp->dumpdate = stralloc(dumpdate);
2145 sp->est_nsize = DISK_BLOCK_KB + nsize; /* include header */
2146 sp->est_csize = DISK_BLOCK_KB + csize; /* include header */
2147 /* round estimate to next multiple of DISK_BLOCK_KB */
2148 sp->est_csize = am_round(sp->est_csize, DISK_BLOCK_KB);
2149 sp->est_size = sp->est_csize;
2150 sp->est_time = time;
2152 sp->priority = priority;
2153 sp->datestamp = stralloc(datestamp);
2156 sp->degr_level = degr_level;
2157 sp->degr_dumpdate = stralloc(degr_dumpdate);
2158 sp->degr_nsize = DISK_BLOCK_KB + degr_nsize;
2159 sp->degr_csize = DISK_BLOCK_KB + degr_csize;
2160 /* round estimate to next multiple of DISK_BLOCK_KB */
2161 sp->degr_csize = am_round(sp->degr_csize, DISK_BLOCK_KB);
2162 sp->degr_time = degr_time;
2163 sp->degr_kps = degr_kps;
2165 sp->degr_level = -1;
2166 sp->degr_dumpdate = NULL;
2171 sp->act_size = (off_t)0;
2175 sp->timestamp = (time_t)0;
2176 sp->destname = NULL;
2179 dp->up = (char *) sp;
2180 if(dp->host->features == NULL) {
2181 dp->host->features = am_string_to_feature(features);
2183 remove_disk(&waitq, dp);
2184 enqueue_disk(&runq, dp);
2185 flush_size += sp->act_size;
2188 printf("driver: flush size " OFF_T_FMT "\n", (OFF_T_FMT_TYPE)flush_size);
2191 log_add(L_WARNING, "WARNING: got empty schedule from planner");
2192 if(need_degraded==1) start_degraded_mode(&runq);
2193 start_some_dumps(&runq);
2196 static unsigned long
2202 if (ip == (interface_t *)0) {
2204 unsigned long maxusage=0;
2205 unsigned long curusage=0;
2206 for(p = lookup_interface(NULL); p != NULL; p = p->next) {
2207 maxusage += interface_get_maxusage(p);
2208 curusage += p->curusage;
2210 res = maxusage - curusage;
2213 res = interface_get_maxusage(ip) - ip->curusage;
2226 printf("driver: interface-state time %s", time_str);
2228 for(ip = lookup_interface(NULL); ip != NULL; ip = ip->next) {
2229 printf(" if %s: free %lu", ip->name, free_kps(ip));
2239 ip->curusage += kps;
2243 deallocate_bandwidth(
2247 assert(kps <= ip->curusage);
2248 ip->curusage -= kps;
2259 total_free = (off_t)0;
2260 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2261 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2269 * We return an array of pointers to assignedhd_t. The array contains at
2270 * most one entry per holding disk. The list of pointers is terminated by
2271 * a NULL pointer. Each entry contains a pointer to a holdingdisk and
2272 * how much diskspace to use on that disk. Later on, assign_holdingdisk
2273 * will allocate the given amount of space.
2274 * If there is not enough room on the holdingdisks, NULL is returned.
2277 static assignedhd_t **
2281 assignedhd_t * pref)
2283 assignedhd_t **result = NULL;
2284 holdingdisk_t *minp, *hdp;
2285 int i=0, num_holdingdisks=0; /* are we allowed to use the global thing? */
2288 off_t halloc, dalloc, hfree, dfree;
2290 (void)cur_idle; /* Quiet unused parameter warning */
2292 if (size < 2*DISK_BLOCK_KB)
2293 size = 2*DISK_BLOCK_KB;
2294 size = am_round(size, (off_t)DISK_BLOCK_KB);
2297 printf("%s: want " OFF_T_FMT " K\n", debug_prefix_time(": find_diskspace"),
2298 (OFF_T_FMT_TYPE)size);
2302 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2306 used = alloc(SIZEOF(*used) * num_holdingdisks);/*disks used during this run*/
2307 memset( used, 0, (size_t)num_holdingdisks );
2308 result = alloc(SIZEOF(assignedhd_t *) * (num_holdingdisks + 1));
2311 while( i < num_holdingdisks && size > (off_t)0 ) {
2312 /* find the holdingdisk with the fewest active dumpers and among
2313 * those the one with the biggest free space
2315 minp = NULL; minj = -1;
2316 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next, j++ ) {
2317 if( pref && pref->disk == hdp && !used[j] &&
2318 holdalloc(hdp)->allocated_space <= hdp->disksize - (off_t)DISK_BLOCK_KB) {
2323 else if( holdalloc(hdp)->allocated_space <= hdp->disksize - (off_t)(2*DISK_BLOCK_KB) &&
2326 holdalloc(hdp)->allocated_dumpers < holdalloc(minp)->allocated_dumpers ||
2327 (holdalloc(hdp)->allocated_dumpers == holdalloc(minp)->allocated_dumpers &&
2328 hdp->disksize-holdalloc(hdp)->allocated_space > minp->disksize-holdalloc(minp)->allocated_space)) ) {
2335 if( !minp ) { break; } /* all holding disks are full */
2338 /* hfree = free space on the disk */
2339 hfree = minp->disksize - holdalloc(minp)->allocated_space;
2341 /* dfree = free space for data, remove 1 header for each chunksize */
2342 dfree = hfree - (((hfree-(off_t)1)/holdingdisk_get_chunksize(minp))+(off_t)1) * (off_t)DISK_BLOCK_KB;
2344 /* dalloc = space I can allocate for data */
2345 dalloc = ( dfree < size ) ? dfree : size;
2347 /* halloc = space to allocate, including 1 header for each chunksize */
2348 halloc = dalloc + (((dalloc-(off_t)1)/holdingdisk_get_chunksize(minp))+(off_t)1) * (off_t)DISK_BLOCK_KB;
2351 printf("%s: find diskspace: size " OFF_T_FMT " hf " OFF_T_FMT
2352 " df " OFF_T_FMT " da " OFF_T_FMT " ha " OFF_T_FMT "\n",
2353 debug_prefix_time(": find_diskspace"),
2354 (OFF_T_FMT_TYPE)size,
2355 (OFF_T_FMT_TYPE)hfree,
2356 (OFF_T_FMT_TYPE)dfree,
2357 (OFF_T_FMT_TYPE)dalloc,
2358 (OFF_T_FMT_TYPE)halloc);
2362 result[i] = alloc(SIZEOF(assignedhd_t));
2363 result[i]->disk = minp;
2364 result[i]->reserved = halloc;
2365 result[i]->used = (off_t)0;
2366 result[i]->destname = NULL;
2369 } /* while i < num_holdingdisks && size > 0 */
2372 if(size != (off_t)0) { /* not enough space available */
2373 printf("find diskspace: not enough diskspace. Left with "
2374 OFF_T_FMT " K\n", (OFF_T_FMT_TYPE)size);
2376 free_assignedhd(result);
2381 for( i = 0; result && result[i]; i++ ) {
2382 printf("%s: find diskspace: selected %s free " OFF_T_FMT " reserved " OFF_T_FMT " dumpers %d\n",
2383 debug_prefix_time(": find_diskspace"),
2384 holdingdisk_get_diskdir(result[i]->disk),
2385 (OFF_T_FMT_TYPE)(result[i]->disk->disksize -
2386 holdalloc(result[i]->disk)->allocated_space),
2387 (OFF_T_FMT_TYPE)result[i]->reserved,
2388 holdalloc(result[i]->disk)->allocated_dumpers);
2398 assignedhd_t ** holdp,
2403 char *sfn = sanitise_filename(diskp->name);
2405 assignedhd_t **new_holdp;
2408 snprintf( lvl, SIZEOF(lvl), "%d", sched(diskp)->level );
2410 size = am_round(sched(diskp)->est_size - sched(diskp)->act_size,
2411 (off_t)DISK_BLOCK_KB);
2413 for( c = 0; holdp[c]; c++ )
2414 (void)c; /* count number of disks */
2416 /* allocate memory for sched(diskp)->holdp */
2417 for(j = 0; sched(diskp)->holdp && sched(diskp)->holdp[j]; j++)
2418 (void)j; /* Quiet lint */
2419 new_holdp = (assignedhd_t **)alloc(SIZEOF(assignedhd_t*)*(j+c+1));
2420 if (sched(diskp)->holdp) {
2421 memcpy(new_holdp, sched(diskp)->holdp, j * SIZEOF(*new_holdp));
2422 amfree(sched(diskp)->holdp);
2424 sched(diskp)->holdp = new_holdp;
2428 if( j > 0 ) { /* This is a request for additional diskspace. See if we can
2429 * merge assignedhd_t's */
2431 if( sched(diskp)->holdp[j-1]->disk == holdp[0]->disk ) { /* Yes! */
2432 sched(diskp)->holdp[j-1]->reserved += holdp[0]->reserved;
2433 holdalloc(holdp[0]->disk)->allocated_space += holdp[0]->reserved;
2434 size = (holdp[0]->reserved>size) ? (off_t)0 : size-holdp[0]->reserved;
2435 qname = quote_string(diskp->name);
2437 printf("%s: merging holding disk %s to disk %s:%s, add " OFF_T_FMT " for reserved " OFF_T_FMT ", left " OFF_T_FMT "\n",
2438 debug_prefix_time(": assign_holdingdisk"),
2439 holdingdisk_get_diskdir(sched(diskp)->holdp[j-1]->disk),
2440 diskp->host->hostname, qname,
2441 (OFF_T_FMT_TYPE)holdp[0]->reserved,
2442 (OFF_T_FMT_TYPE)sched(diskp)->holdp[j-1]->reserved,
2443 (OFF_T_FMT_TYPE)size);
2453 /* copy assignedhd_s to sched(diskp), adjust allocated_space */
2454 for( ; holdp[i]; i++ ) {
2455 holdp[i]->destname = newvstralloc( holdp[i]->destname,
2456 holdingdisk_get_diskdir(holdp[i]->disk), "/",
2457 hd_driver_timestamp, "/",
2458 diskp->host->hostname, ".",
2461 sched(diskp)->holdp[j++] = holdp[i];
2462 holdalloc(holdp[i]->disk)->allocated_space += holdp[i]->reserved;
2463 size = (holdp[i]->reserved > size) ? (off_t)0 :
2464 (size - holdp[i]->reserved);
2465 qname = quote_string(diskp->name);
2467 printf("%s: %d assigning holding disk %s to disk %s:%s, reserved " OFF_T_FMT ", left " OFF_T_FMT "\n",
2468 debug_prefix_time(": assign_holdingdisk"),
2469 i, holdingdisk_get_diskdir(holdp[i]->disk), diskp->host->hostname, qname,
2470 (OFF_T_FMT_TYPE)holdp[i]->reserved,
2471 (OFF_T_FMT_TYPE)size);
2475 holdp[i] = NULL; /* so it doesn't get free()d... */
2477 sched(diskp)->holdp[j] = NULL;
2488 assignedhd_t **holdp;
2489 off_t total = (off_t)0;
2492 char *qname, *hqname, *qdest;
2494 (void)cmd; /* Quiet unused parameter warning */
2496 qname = quote_string(diskp->name);
2497 qdest = quote_string(sched(diskp)->destname);
2499 printf("%s: %s:%s %s\n",
2500 debug_prefix_time(": adjust_diskspace"),
2501 diskp->host->hostname, qname, qdest);
2505 holdp = sched(diskp)->holdp;
2507 assert(holdp != NULL);
2509 for( i = 0; holdp[i]; i++ ) { /* for each allocated disk */
2510 diff = holdp[i]->used - holdp[i]->reserved;
2511 total += holdp[i]->used;
2512 holdalloc(holdp[i]->disk)->allocated_space += diff;
2513 hqname = quote_string(holdp[i]->disk->name);
2515 printf("%s: hdisk %s done, reserved " OFF_T_FMT " used " OFF_T_FMT " diff " OFF_T_FMT " alloc " OFF_T_FMT " dumpers %d\n",
2516 debug_prefix_time(": adjust_diskspace"),
2517 holdp[i]->disk->name,
2518 (OFF_T_FMT_TYPE)holdp[i]->reserved,
2519 (OFF_T_FMT_TYPE)holdp[i]->used,
2520 (OFF_T_FMT_TYPE)diff,
2521 (OFF_T_FMT_TYPE)holdalloc(holdp[i]->disk)->allocated_space,
2522 holdalloc(holdp[i]->disk)->allocated_dumpers );
2525 holdp[i]->reserved += diff;
2529 sched(diskp)->act_size = total;
2532 printf("%s: after: disk %s:%s used " OFF_T_FMT "\n",
2533 debug_prefix_time(": adjust_diskspace"),
2534 diskp->host->hostname, qname,
2535 (OFF_T_FMT_TYPE)sched(diskp)->act_size);
2546 assignedhd_t **holdp;
2549 holdp = sched(diskp)->holdp;
2551 assert(holdp != NULL);
2553 for( i = 0; holdp[i]; i++ ) { /* for each disk */
2554 /* find all files of this dump on that disk, and subtract their
2555 * reserved sizes from the disk's allocated space
2557 holdalloc(holdp[i]->disk)->allocated_space -= holdp[i]->used;
2560 unlink_holding_files(holdp[0]->destname); /* no need for the entire list,
2561 * because unlink_holding_files
2562 * will walk through all files
2563 * using cont_filename */
2564 free_assignedhd(sched(diskp)->holdp);
2565 sched(diskp)->holdp = NULL;
2566 sched(diskp)->act_size = (off_t)0;
2569 static assignedhd_t **
2576 char buffer[DISK_BLOCK_BYTES];
2578 assignedhd_t **result;
2581 int num_holdingdisks=0;
2582 char dirname[1000], *ch;
2584 char *filename = destname;
2586 memset(buffer, 0, sizeof(buffer));
2587 for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2590 used = alloc(SIZEOF(off_t) * num_holdingdisks);
2591 for(i=0;i<num_holdingdisks;i++)
2593 result = alloc(SIZEOF(assignedhd_t *) * (num_holdingdisks + 1));
2595 while(filename != NULL && filename[0] != '\0') {
2596 strncpy(dirname, filename, 999);
2598 ch = strrchr(dirname,'/');
2600 ch = strrchr(dirname,'/');
2603 for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL;
2604 hdp = hdp->next, j++ ) {
2605 if(strcmp(dirname, holdingdisk_get_diskdir(hdp))==0) {
2610 if(stat(filename, &finfo) == -1) {
2611 fprintf(stderr, "stat %s: %s\n", filename, strerror(errno));
2612 finfo.st_size = (off_t)0;
2614 used[j] += ((off_t)finfo.st_size+(off_t)1023)/(off_t)1024;
2615 if((fd = open(filename,O_RDONLY)) == -1) {
2616 fprintf(stderr,"build_diskspace: open of %s failed: %s\n",
2617 filename, strerror(errno));
2620 if ((buflen = fullread(fd, buffer, SIZEOF(buffer))) > 0) {;
2621 parse_file_header(buffer, &file, (size_t)buflen);
2624 filename = file.cont_filename;
2627 for(j = 0, i=0, hdp = getconf_holdingdisks(); hdp != NULL;
2628 hdp = hdp->next, j++ ) {
2629 if(used[j] != (off_t)0) {
2630 result[i] = alloc(SIZEOF(assignedhd_t));
2631 result[i]->disk = hdp;
2632 result[i]->reserved = used[j];
2633 result[i]->used = used[j];
2634 result[i]->destname = stralloc(destname);
2652 printf("driver: hdisk-state time %s", time_str);
2654 for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
2655 diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2656 printf(" hdisk %d: free " OFF_T_FMT " dumpers %d", dsk,
2657 (OFF_T_FMT_TYPE)diff, holdalloc(hdp)->allocated_dumpers);
2663 update_failed_dump_to_tape(
2667 * should simply set no_bump
2670 time_t save_timestamp = sched(dp)->timestamp;
2671 /* setting timestamp to 0 removes the current level from the
2672 * database, so that we ensure that it will not be bumped to the
2673 * next level on the next run. If we didn't do this, dumpdates or
2674 * gnutar-lists might have been updated already, and a bumped
2675 * incremental might be created. */
2676 sched(dp)->timestamp = 0;
2677 update_info_dumper(dp, (off_t)-1, (off_t)-1, (time_t)-1);
2678 sched(dp)->timestamp = save_timestamp;
2681 /* ------------------- */
2689 off_t origsize = (off_t)0;
2690 off_t dumpsize = (off_t)0;
2691 time_t dumptime = (time_t)0;
2692 double tapetime = 0.0;
2694 int result_argc, rc;
2695 char *result_argv[MAX_ARGS+1];
2696 int dumper_tryagain = 0;
2699 qname = quote_string(dp->name);
2700 printf("driver: dumping %s:%s directly to tape\n",
2701 dp->host->hostname, qname);
2704 /* pick a dumper and fail if there are no idle dumpers */
2706 dumper = idle_dumper();
2708 printf("driver: no idle dumpers for %s:%s.\n",
2709 dp->host->hostname, qname);
2711 log_add(L_WARNING, "no idle dumpers for %s:%s.\n",
2712 dp->host->hostname, qname);
2714 return 2; /* fatal problem */
2717 /* tell the taper to read from a port number of its choice */
2719 taper_cmd(PORT_WRITE, dp, NULL, sched(dp)->level, sched(dp)->datestamp);
2720 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2722 printf("driver: did not get PORT from taper for %s:%s\n",
2723 dp->host->hostname, qname);
2726 return 2; /* fatal problem */
2728 /* copy port number */
2729 dumper->output_port = atoi(result_argv[2]);
2731 /* tell the dumper to dump to a port */
2733 dumper_cmd(dumper, PORT_DUMP, dp);
2734 dp->host->start_t = time(NULL) + 15;
2736 /* update statistics & print state */
2738 taper_busy = dumper->busy = 1;
2739 dp->host->inprogress += 1;
2741 sched(dp)->timestamp = time((time_t *)0);
2742 allocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2743 idle_reason = NOT_IDLE;
2747 /* wait for result from dumper */
2749 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
2753 /* either eof or garbage from dumper */
2754 log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
2755 dumper->name, (long)dumper->pid);
2756 dumper->down = 1; /* mark it down so it isn't used again */
2757 failed = 1; /* dump failed, must still finish up with taper */
2760 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
2761 /* everything went fine */
2762 origsize = (off_t)atof(result_argv[3]);
2763 /*dumpsize = (off_t)atof(result_argv[4]);*/
2764 dumptime = (time_t)atof(result_argv[5]);
2767 case NO_ROOM: /* NO-ROOM <handle> */
2768 dumper_cmd(dumper, ABORT, dp);
2769 cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
2770 assert(cmd == ABORT_FINISHED);
2772 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
2774 /* dump failed, but we must still finish up with taper */
2775 /* problem with dump, possibly nonfatal, retry one time */
2776 sched(dp)->attempted++;
2777 failed = sched(dp)->attempted;
2778 dumper_tryagain = 1;
2781 case FAILED: /* FAILED <handle> <errstr> */
2782 /* dump failed, but we must still finish up with taper */
2783 failed = 2; /* fatal problem with dump */
2788 * Note that at this point, even if the dump above failed, it may
2789 * not be a fatal failure if taper below says we can try again.
2790 * E.g. a dumper failure above may actually be the result of a
2791 * tape overflow, which in turn causes dump to see "broken pipe",
2792 * "no space on device", etc., since taper closed the port first.
2797 cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2801 case DONE: /* DONE <handle> <label> <tape file> <err mess> */
2802 if(result_argc != 5) {
2803 error("error [dump to tape DONE result_argc != 5: %d]", result_argc);
2807 if(failed == 1) goto tryagain; /* dump didn't work */
2808 else if(failed == 2) goto failed_dumper;
2810 free_serial(result_argv[2]);
2812 dumpsize = (off_t)0;
2813 if (*result_argv[5] == '"') {
2814 /* String was quoted */
2815 rc = sscanf(result_argv[5],"\"[sec %lf kb " OFF_T_FMT " ",
2816 &tapetime, (OFF_T_FMT_TYPE *)&dumpsize);
2818 /* String was not quoted */
2819 rc = sscanf(result_argv[5],"[sec %lf kb " OFF_T_FMT " ",
2820 &tapetime, (OFF_T_FMT_TYPE *)&dumpsize);
2823 error("error [malformed result: %d items matched in '%s']",
2824 rc, result_argv[5]);
2829 /* every thing went fine */
2830 update_info_dumper(dp, origsize, dumpsize, dumptime);
2831 filenum = OFF_T_ATOI(result_argv[4]);
2832 update_info_taper(dp, result_argv[3], filenum, sched(dp)->level);
2833 /* note that update_info_dumper() must be run before
2834 update_info_taper(), since update_info_dumper overwrites
2835 tape information. */
2840 case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
2841 tape_left = tape_length;
2843 if(dumper_tryagain == 0) {
2844 sched(dp)->attempted++;
2845 if(sched(dp)->attempted > failed)
2846 failed = sched(dp)->attempted;
2850 headqueue_disk(&runq, dp);
2852 update_failed_dump_to_tape(dp);
2853 free_serial(result_argv[2]);
2856 case SPLIT_CONTINUE: /* SPLIT_CONTINUE <handle> <new_label> */
2857 if (result_argc != 3) {
2858 error("error [taper SPLIT_CONTINUE result_argc != 3: %d]", result_argc);
2861 fprintf(stderr, "driver: Got SPLIT_CONTINUE %s %s\n",
2862 result_argv[2], result_argv[3]);
2863 goto continue_port_dump;
2865 case SPLIT_NEEDNEXT:
2866 fprintf(stderr, "driver: Got SPLIT_NEEDNEXT %s %s\n", result_argv[2], result_argv[3]);
2868 goto continue_port_dump;
2870 case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
2873 update_failed_dump_to_tape(dp);
2874 free_serial(result_argv[2]);
2875 failed = 2; /* fatal problem */
2876 start_degraded_mode(&runq);
2880 /* reset statistics & return */
2882 taper_busy = dumper->busy = 0;
2883 dp->host->inprogress -= 1;
2885 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2898 for(len = 0, p = q.head; p != NULL; len++, p = p->next)
2899 (void)len; /* Quiet lint */
2904 short_dump_state(void)
2909 wall_time = walltime_str(curclock());
2911 printf("driver: state time %s ", wall_time);
2912 printf("free kps: %lu space: " OFF_T_FMT " taper: ",
2913 free_kps((interface_t *)0),
2914 (OFF_T_FMT_TYPE)free_space());
2915 if(degraded_mode) printf("DOWN");
2916 else if(!taper_busy) printf("idle");
2917 else printf("writing");
2919 for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
2920 printf(" idle-dumpers: %d", nidle);
2921 printf(" qlen tapeq: %d", queue_length(tapeq));
2922 printf(" runq: %d", queue_length(runq));
2923 printf(" roomq: %d", queue_length(roomq));
2924 printf(" wakeup: %d", (int)sleep_time);
2925 printf(" driver-idle: %s\n", idle_strings[idle_reason]);
2926 interface_state(wall_time);
2927 holdingdisk_state(wall_time);
2940 printf("================\n");
2941 printf("driver state at time %s: %s\n", walltime_str(curclock()), str);
2942 printf("free kps: %lu, space: " OFF_T_FMT "\n",
2943 free_kps((interface_t *)0),
2944 (OFF_T_FMT_TYPE)free_space());
2945 if(degraded_mode) printf("taper: DOWN\n");
2946 else if(!taper_busy) printf("taper: idle\n");
2947 else printf("taper: writing %s:%s.%d est size " OFF_T_FMT "\n",
2948 taper_disk->host->hostname, taper_disk->name,
2949 sched(taper_disk)->level,
2950 sched(taper_disk)->est_size);
2951 for(i = 0; i < inparallel; i++) {
2952 dp = dmptable[i].dp;
2953 if(!dmptable[i].busy)
2954 printf("%s: idle\n", dmptable[i].name);
2956 qname = quote_string(dp->name);
2957 printf("%s: dumping %s:%s.%d est kps %d size " OFF_T_FMT " time %lu\n",
2958 dmptable[i].name, dp->host->hostname, qname, sched(dp)->level,
2959 sched(dp)->est_kps, sched(dp)->est_size, sched(dp)->est_time);
2962 dump_queue("TAPE", tapeq, 5, stdout);
2963 dump_queue("ROOM", roomq, 5, stdout);
2964 dump_queue("RUN ", runq, 5, stdout);
2965 printf("================\n");