e002992b5fda17bf5dd29d2b978c33dda4522df0
[debian/amanda] / server-src / driver.c
1 /*
2  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3  * Copyright (c) 1991-1998 University of Maryland at College Park
4  * All Rights Reserved.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of U.M. not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  U.M. makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Authors: the Amanda Development Team.  Its members are listed in a
24  * file named AUTHORS, in the root directory of this distribution.
25  */
26 /*
27  * $Id: driver.c 6512 2007-05-24 17:00:24Z ian $
28  *
29  * controlling process for the Amanda backup system
30  */
31
32 /*
33  * XXX possibly modify tape queue to be cognizant of how much room is left on
34  *     tape.  Probably not effective though, should do this in planner.
35  */
36
37 #include "amanda.h"
38 #include "clock.h"
39 #include "conffile.h"
40 #include "diskfile.h"
41 #include "event.h"
42 #include "holding.h"
43 #include "infofile.h"
44 #include "logfile.h"
45 #include "fsusage.h"
46 #include "version.h"
47 #include "driverio.h"
48 #include "server_util.h"
49 #include "timestamp.h"
50
51 #define driver_debug(i, ...) do {       \
52         if ((i) <= debug_driver) {      \
53             dbprintf(__VA_ARGS__);      \
54         }                               \
55 } while (0)
56
57 #define hold_debug(i, ...) do {         \
58         if ((i) <= debug_holding) {     \
59             dbprintf(__VA_ARGS__);      \
60         }                               \
61 } while (0)
62
63 static disklist_t waitq;        // dle waiting estimate result
64 static disklist_t runq;         // dle waiting to be dumped to holding disk
65 static disklist_t directq;      // dle waiting to be dumped directly to tape
66 static disklist_t tapeq;        // dle on holding disk waiting to be written
67                                 //   to tape
68 static disklist_t roomq;        // dle waiting for more space on holding disk
69 static int pending_aborts;
70 static disk_t *taper_disk;
71 static int degraded_mode;
72 static off_t reserved_space;
73 static off_t total_disksize;
74 static char *dumper_program;
75 static char *chunker_program;
76 static int  inparallel;
77 static int nodump = 0;
78 static off_t tape_length = (off_t)0;
79 static off_t tape_left = (off_t)0;
80 static int current_tape = 0;
81 static int conf_taperalgo;
82 static int conf_runtapes;
83 static time_t sleep_time;
84 static int idle_reason;
85 static char *driver_timestamp;
86 static char *hd_driver_timestamp;
87 static am_host_t *flushhost = NULL;
88 static int need_degraded=0;
89 static holdalloc_t *holdalloc;
90 static int num_holdalloc;
91 static event_handle_t *dumpers_ev_time = NULL;
92 static event_handle_t *schedule_ev_read = NULL;
93 static int   conf_flush_threshold_dumped;
94 static int   conf_flush_threshold_scheduled;
95 static int   conf_taperflush;
96 static off_t flush_threshold_dumped;
97 static off_t flush_threshold_scheduled;
98 static off_t taperflush;
99 static int   schedule_done;                     // 1 if we don't wait for a
100                                                 //   schedule from the planner
101 static int   force_flush;                       // All dump are terminated, we
102                                                 // must now respect taper_flush
103
104 static int wait_children(int count);
105 static void wait_for_children(void);
106 static void allocate_bandwidth(netif_t *ip, unsigned long kps);
107 static int assign_holdingdisk(assignedhd_t **holdp, disk_t *diskp);
108 static void adjust_diskspace(disk_t *diskp, cmd_t cmd);
109 static void delete_diskspace(disk_t *diskp);
110 static assignedhd_t **build_diskspace(char *destname);
111 static int client_constrained(disk_t *dp);
112 static void deallocate_bandwidth(netif_t *ip, unsigned long kps);
113 static void dump_schedule(disklist_t *qp, char *str);
114 static void dump_to_tape(disk_t *dp);
115 static assignedhd_t **find_diskspace(off_t size, int *cur_idle,
116                                         assignedhd_t *preferred);
117 static unsigned long free_kps(netif_t *ip);
118 static off_t free_space(void);
119 static void dumper_chunker_result(disk_t *dp);
120 static void dumper_taper_result(disk_t *dp);
121 static void file_taper_result(disk_t *dp);
122 static void handle_dumper_result(void *);
123 static void handle_chunker_result(void *);
124 static void handle_dumpers_time(void *);
125 static void handle_taper_result(void *);
126
127 static void holdingdisk_state(char *time_str);
128 static dumper_t *idle_dumper(void);
129 static void interface_state(char *time_str);
130 static int queue_length(disklist_t q);
131 static disklist_t read_flush(void);
132 static void read_schedule(void *cookie);
133 static void short_dump_state(void);
134 static void startaflush(void);
135 static void start_degraded_mode(disklist_t *queuep);
136 static void start_some_dumps(disklist_t *rq);
137 static void continue_port_dumps(void);
138 static void update_failed_dump(disk_t *);
139
140 typedef enum {
141     TAPE_ACTION_NO_ACTION     = 0,
142     TAPE_ACTION_NEW_TAPE      = (1 << 0),
143     TAPE_ACTION_NO_NEW_TAPE   = (1 << 1),
144     TAPE_ACTION_START_A_FLUSH = (1 << 2)
145 } TapeAction;
146
147 static TapeAction tape_action(char **why_no_new_tape);
148
149 static const char *idle_strings[] = {
150 #define NOT_IDLE                0
151     T_("not-idle"),
152 #define IDLE_NO_DUMPERS         1
153     T_("no-dumpers"),
154 #define IDLE_START_WAIT         2
155     T_("start-wait"),
156 #define IDLE_NO_HOLD            3
157     T_("no-hold"),
158 #define IDLE_CLIENT_CONSTRAINED 4
159     T_("client-constrained"),
160 #define IDLE_NO_BANDWIDTH       5
161     T_("no-bandwidth"),
162 #define IDLE_NO_DISKSPACE       6
163     T_("no-diskspace")
164 };
165
166 int
167 main(
168     int         argc,
169     char **     argv)
170 {
171     disklist_t origq;
172     disk_t *diskp;
173     int dsk;
174     dumper_t *dumper;
175     char *newdir = NULL;
176     struct fs_usage fsusage;
177     holdingdisk_t *hdp;
178     unsigned long reserve = 100;
179     char *conf_diskfile;
180     cmd_t cmd;
181     int result_argc;
182     char **result_argv = NULL;
183     char *taper_program;
184     char *conf_tapetype;
185     tapetype_t *tape;
186     char *line;
187     char hostname[1025];
188     intmax_t kb_avail;
189     config_overwrites_t *cfg_ovr = NULL;
190     char *cfg_opt = NULL;
191     holdalloc_t *ha, *ha_last;
192
193     /*
194      * Configure program for internationalization:
195      *   1) Only set the message locale for now.
196      *   2) Set textdomain for all amanda related programs to "amanda"
197      *      We don't want to be forced to support dozens of message catalogs.
198      */  
199     setlocale(LC_MESSAGES, "C");
200     textdomain("amanda");
201
202     safe_fd(-1, 0);
203
204     setvbuf(stdout, (char *)NULL, (int)_IOLBF, 0);
205     setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
206
207     set_pname("driver");
208
209     dbopen(DBG_SUBDIR_SERVER);
210
211     atexit(wait_for_children);
212
213     /* Don't die when child closes pipe */
214     signal(SIGPIPE, SIG_IGN);
215
216     erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
217     set_logerror(logerror);
218
219     startclock();
220
221     cfg_ovr = extract_commandline_config_overwrites(&argc, &argv);
222
223     if (argc > 1)
224         cfg_opt = argv[1];
225     config_init(CONFIG_INIT_EXPLICIT_NAME | CONFIG_INIT_USE_CWD, cfg_opt);
226     apply_config_overwrites(cfg_ovr);
227
228     conf_diskfile = config_dir_relative(getconf_str(CNF_DISKFILE));
229     read_diskfile(conf_diskfile, &origq);
230     amfree(conf_diskfile);
231
232     if (config_errors(NULL) >= CFGERR_WARNINGS) {
233         config_print_errors();
234         if (config_errors(NULL) >= CFGERR_ERRORS) {
235             g_critical(_("errors processing config file"));
236         }
237     }
238
239     log_add(L_INFO, "%s pid %ld", get_pname(), (long)getpid());
240     g_printf(_("%s: pid %ld executable %s version %s\n"),
241            get_pname(), (long) getpid(), argv[0], version());
242
243     if(argc > 2) {
244         if(strncmp(argv[2], "nodump", 6) == 0) {
245             nodump = 1;
246         }
247     }
248
249     safe_cd(); /* do this *after* config_init */
250
251     check_running_as(RUNNING_AS_DUMPUSER);
252
253     dbrename(get_config_name(), DBG_SUBDIR_SERVER);
254
255     amfree(driver_timestamp);
256     /* read timestamp from stdin */
257     while ((line = agets(stdin)) != NULL) {
258         if (line[0] != '\0')
259             break;
260         amfree(line);
261     }
262     if ( line == NULL ) {
263       error(_("Did not get DATE line from planner"));
264       /*NOTREACHED*/
265     }
266     driver_timestamp = alloc(15);
267     strncpy(driver_timestamp, &line[5], 14);
268     driver_timestamp[14] = '\0';
269     amfree(line);
270     log_add(L_START,_("date %s"), driver_timestamp);
271
272     gethostname(hostname, SIZEOF(hostname));
273     log_add(L_STATS,_("hostname %s"), hostname);
274
275     /* check that we don't do many dump in a day and usetimestamps is off */
276     if(strlen(driver_timestamp) == 8) {
277         if (!nodump) {
278             char *conf_logdir = getconf_str(CNF_LOGDIR);
279             char *logfile    = vstralloc(conf_logdir, "/log.",
280                                          driver_timestamp, ".0", NULL);
281             char *oldlogfile = vstralloc(conf_logdir, "/oldlog/log.",
282                                          driver_timestamp, ".0", NULL);
283             if(access(logfile, F_OK) == 0 || access(oldlogfile, F_OK) == 0) {
284                 log_add(L_WARNING, _("WARNING: This is not the first amdump run today. Enable the usetimestamps option in the configuration file if you want to run amdump more than once per calendar day."));
285             }
286             amfree(oldlogfile);
287             amfree(logfile);
288         }
289         hd_driver_timestamp = get_timestamp_from_time(0);
290     }
291     else {
292         hd_driver_timestamp = stralloc(driver_timestamp);
293     }
294
295     taper_program = vstralloc(amlibexecdir, "/", "taper", versionsuffix(), NULL);
296     dumper_program = vstralloc(amlibexecdir, "/", "dumper", versionsuffix(),
297                                NULL);
298     chunker_program = vstralloc(amlibexecdir, "/", "chunker", versionsuffix(),
299                                NULL);
300
301     conf_taperalgo = getconf_taperalgo(CNF_TAPERALGO);
302     conf_tapetype = getconf_str(CNF_TAPETYPE);
303     conf_runtapes = getconf_int(CNF_RUNTAPES);
304     tape = lookup_tapetype(conf_tapetype);
305     tape_length = tapetype_get_length(tape);
306     g_printf("driver: tape size %lld\n", (long long)tape_length);
307     conf_flush_threshold_dumped = getconf_int(CNF_FLUSH_THRESHOLD_DUMPED);
308     conf_flush_threshold_scheduled = getconf_int(CNF_FLUSH_THRESHOLD_SCHEDULED);
309     conf_taperflush = getconf_int(CNF_TAPERFLUSH);
310
311     flush_threshold_dumped = (conf_flush_threshold_dumped * tape_length) / 100;
312     flush_threshold_scheduled = (conf_flush_threshold_scheduled * tape_length) / 100;
313     taperflush = (conf_taperflush *tape_length) / 100;
314
315     driver_debug(1, _("flush_threshold_dumped: %lld\n"), (long long)flush_threshold_dumped);
316     driver_debug(1, _("flush_threshold_scheduled: %lld\n"), (long long)flush_threshold_scheduled);
317     driver_debug(1, _("taperflush: %lld\n"), (long long)taperflush);
318
319     /* set up any configuration-dependent variables */
320
321     inparallel  = getconf_int(CNF_INPARALLEL);
322
323     reserve = (unsigned long)getconf_int(CNF_RESERVE);
324
325     total_disksize = (off_t)0;
326     ha_last = NULL;
327     num_holdalloc = 0;
328     for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = holdingdisk_next(hdp), dsk++) {
329         ha = alloc(SIZEOF(holdalloc_t));
330         num_holdalloc++;
331
332         /* link the list in the same order as getconf_holdingdisks's results */
333         ha->next = NULL;
334         if (ha_last == NULL)
335             holdalloc = ha;
336         else
337             ha_last->next = ha;
338         ha_last = ha;
339
340         ha->hdisk = hdp;
341         ha->allocated_dumpers = 0;
342         ha->allocated_space = (off_t)0;
343         ha->disksize = holdingdisk_get_disksize(hdp);
344
345         /* get disk size */
346         if(get_fs_usage(holdingdisk_get_diskdir(hdp), NULL, &fsusage) == -1
347            || access(holdingdisk_get_diskdir(hdp), W_OK) == -1) {
348             log_add(L_WARNING, _("WARNING: ignoring holding disk %s: %s\n"),
349                     holdingdisk_get_diskdir(hdp), strerror(errno));
350             ha->disksize = 0L;
351             continue;
352         }
353
354         /* do the division first to avoid potential integer overflow */
355         if (fsusage.fsu_bavail_top_bit_set)
356             kb_avail = 0;
357         else
358             kb_avail = fsusage.fsu_bavail / 1024 * fsusage.fsu_blocksize;
359
360         if(ha->disksize > (off_t)0) {
361             if(ha->disksize > kb_avail) {
362                 log_add(L_WARNING,
363                         _("WARNING: %s: %lld KB requested, "
364                         "but only %lld KB available."),
365                         holdingdisk_get_diskdir(hdp),
366                         (long long)ha->disksize,
367                         (long long)kb_avail);
368                         ha->disksize = kb_avail;
369             }
370         }
371         /* ha->disksize is negative; use all but that amount */
372         else if(kb_avail < -ha->disksize) {
373             log_add(L_WARNING,
374                     _("WARNING: %s: not %lld KB free."),
375                     holdingdisk_get_diskdir(hdp),
376                     (long long)-ha->disksize);
377             ha->disksize = (off_t)0;
378             continue;
379         }
380         else
381             ha->disksize += kb_avail;
382
383         g_printf(_("driver: adding holding disk %d dir %s size %lld chunksize %lld\n"),
384                dsk, holdingdisk_get_diskdir(hdp),
385                (long long)ha->disksize,
386                (long long)(holdingdisk_get_chunksize(hdp)));
387
388         newdir = newvstralloc(newdir,
389                               holdingdisk_get_diskdir(hdp), "/", hd_driver_timestamp,
390                               NULL);
391         if(!mkholdingdir(newdir)) {
392             ha->disksize = (off_t)0;
393         }
394         total_disksize += ha->disksize;
395     }
396
397     reserved_space = total_disksize * (off_t)(reserve / 100);
398
399     g_printf(_("reserving %lld out of %lld for degraded-mode dumps\n"),
400            (long long)reserved_space, (long long)free_space());
401
402     amfree(newdir);
403
404     if(inparallel > MAX_DUMPERS) inparallel = MAX_DUMPERS;
405
406     /* taper takes a while to get going, so start it up right away */
407
408     init_driverio();
409     if(conf_runtapes > 0) {
410         startup_tape_process(taper_program);
411         taper_cmd(START_TAPER, driver_timestamp, NULL, 0, NULL);
412     }
413
414     /* fire up the dumpers now while we are waiting */
415     if(!nodump) startup_dump_processes(dumper_program, inparallel, driver_timestamp);
416
417     /*
418      * Read schedule from stdin.  Usually, this is a pipe from planner,
419      * so the effect is that we wait here for the planner to
420      * finish, but meanwhile the taper is rewinding the tape, reading
421      * the label, checking it, writing a new label and all that jazz
422      * in parallel with the planner.
423      */
424
425     runq.head = NULL;
426     runq.tail = NULL;
427     directq.head = NULL;
428     directq.tail = NULL;
429     waitq = origq;
430     taper_state = TAPER_STATE_DEFAULT;
431     tapeq = read_flush();
432
433     roomq.head = roomq.tail = NULL;
434
435     log_add(L_STATS, _("startup time %s"), walltime_str(curclock()));
436
437     g_printf(_("driver: start time %s inparallel %d bandwidth %lu diskspace %lld "), walltime_str(curclock()), inparallel,
438            free_kps(NULL), (long long)free_space());
439     g_printf(_(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n"),
440            "OBSOLETE", driver_timestamp, taperalgo2str(conf_taperalgo),
441            getconf_str(CNF_DUMPORDER));
442     fflush(stdout);
443
444     /* ok, planner is done, now lets see if the tape is ready */
445
446     if (conf_runtapes > 0) {
447         cmd = getresult(taper, 1, &result_argc, &result_argv);
448         if (cmd != TAPER_OK) {
449             /* no tape, go into degraded mode: dump to holding disk */
450             need_degraded = 1;
451         }
452     } else {
453         need_degraded = 1;
454     }
455
456     tape_left = tape_length;
457     taper_busy = 0;
458     amfree(taper_input_error);
459     amfree(taper_tape_error);
460     taper_disk = NULL;
461     taper_ev_read = NULL;
462
463     schedule_done = nodump;
464     force_flush = 0;
465
466     if(!need_degraded) startaflush();
467
468     if(!nodump)
469         schedule_ev_read = event_register((event_id_t)0, EV_READFD, read_schedule, NULL);
470
471     short_dump_state();
472     event_loop(0);
473
474     force_flush = 1;
475
476     /* mv runq to directq */
477     while (!empty(runq)) {
478         diskp = dequeue_disk(&runq);
479         headqueue_disk(&directq, diskp);
480     }
481
482     /* handle any remaining dumps by dumping directly to tape, if possible */
483     while(!empty(directq) && taper > 0) {
484         time_t  sleep_time  = 100000000;
485         disk_t *sleep_diskp = NULL;
486         time_t  now         = time(0);
487
488         /* Find one we can do immediately or the sonner */
489         for (diskp = directq.head; diskp != NULL; diskp = diskp->next) {
490             if (diskp->to_holdingdisk == HOLD_REQUIRED ||
491                 degraded_mode) {
492                 sleep_time = 0;
493                 sleep_diskp = diskp;
494             } else if (diskp->host->start_t - now < sleep_time &&
495                        diskp->start_t -now < sleep_time) {
496                 if (diskp->host->start_t > diskp->start_t)
497                     sleep_time = diskp->host->start_t - now;
498                 else
499                     sleep_time = diskp->start_t - now;
500                 sleep_diskp = diskp;
501             }
502         }
503         diskp = sleep_diskp;
504         if (sleep_time > 0)
505             sleep(sleep_time);
506         remove_disk(&directq, diskp);
507
508         if (diskp->to_holdingdisk == HOLD_REQUIRED) {
509             char *qname = quote_string(diskp->name);
510             log_add(L_FAIL, "%s %s %s %d [%s]",
511                 diskp->host->hostname, qname, sched(diskp)->datestamp,
512                 sched(diskp)->level,
513                 _("can't dump required holdingdisk"));
514             amfree(qname);
515         }
516         else if (!degraded_mode) {
517             taper_state |= TAPER_STATE_DUMP_TO_TAPE;
518             dump_to_tape(diskp);
519             event_loop(0);
520             taper_state &= ~TAPER_STATE_DUMP_TO_TAPE;
521         }
522         else {
523             char *qname = quote_string(diskp->name);
524             log_add(L_FAIL, "%s %s %s %d [%s]",
525                 diskp->host->hostname, qname, sched(diskp)->datestamp,
526                 sched(diskp)->level,
527                 num_holdalloc == 0 ?
528                     _("can't do degraded dump without holding disk") :
529                 diskp->to_holdingdisk != HOLD_NEVER ?
530                     _("out of holding space in degraded mode") :
531                     _("can't dump 'holdingdisk never' dle in degraded mode"));
532             amfree(qname);
533         }
534     }
535
536     /* fill up the tape or start new one for taperflush */
537     startaflush();
538     event_loop(0);
539
540     short_dump_state();                         /* for amstatus */
541
542     g_printf(_("driver: QUITTING time %s telling children to quit\n"),
543            walltime_str(curclock()));
544     fflush(stdout);
545
546     if(!nodump) {
547         for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
548             if(dumper->fd >= 0)
549                 dumper_cmd(dumper, QUIT, NULL, NULL);
550         }
551     }
552
553     if(taper >= 0) {
554         taper_cmd(QUIT, NULL, NULL, 0, NULL);
555     }
556
557     /* wait for all to die */
558     wait_children(600);
559
560     /* cleanup */
561     holding_cleanup(NULL, NULL);
562
563     amfree(newdir);
564
565     check_unfree_serial();
566     g_printf(_("driver: FINISHED time %s\n"), walltime_str(curclock()));
567     fflush(stdout);
568     log_add(L_FINISH,_("date %s time %s"), driver_timestamp, walltime_str(curclock()));
569     log_add(L_INFO, "pid-done %ld", (long)getpid());
570     amfree(driver_timestamp);
571
572     amfree(dumper_program);
573     amfree(taper_program);
574     if (result_argv)
575         g_strfreev(result_argv);
576
577     dbclose();
578
579     return 0;
580 }
581
582 /* sleep up to count seconds, and wait for terminating child process */
583 /* if sleep is negative, this function will not timeout              */
584 /* exit once all child process are finished or the timout expired    */
585 /* return 0 if no more children to wait                              */
586 /* return 1 if some children are still alive                         */
587 static int
588 wait_children(int count)
589 {
590     pid_t     pid;
591     amwait_t  retstat;
592     char     *who;
593     char     *what;
594     int       code=0;
595     dumper_t *dumper;
596     int       wait_errno;
597
598     do {
599         do {
600             pid = waitpid((pid_t)-1, &retstat, WNOHANG);
601             wait_errno = errno;
602             if (pid > 0) {
603                 what = NULL;
604                 if (! WIFEXITED(retstat)) {
605                     what = _("signal");
606                     code = WTERMSIG(retstat);
607                 } else if (WEXITSTATUS(retstat) != 0) {
608                     what = _("code");
609                     code = WEXITSTATUS(retstat);
610                 }
611                 who = NULL;
612                 for (dumper = dmptable; dumper < dmptable + inparallel;
613                      dumper++) {
614                     if (pid == dumper->pid) {
615                         who = stralloc(dumper->name);
616                         dumper->pid = -1;
617                         break;
618                     }
619                     if (dumper->chunker && pid == dumper->chunker->pid) {
620                         who = stralloc(dumper->chunker->name);
621                         dumper->chunker->pid = -1;
622                         break;
623                     }
624                 }
625                 if (who == NULL && pid == taper_pid) {
626                     who = stralloc("taper");
627                     taper_pid = -1;
628                 }
629                 if(what != NULL && who == NULL) {
630                     who = stralloc("unknown");
631                 }
632                 if(who && what) {
633                     log_add(L_WARNING, _("%s pid %u exited with %s %d\n"), who, 
634                             (unsigned)pid, what, code);
635                     g_printf(_("driver: %s pid %u exited with %s %d\n"), who,
636                            (unsigned)pid, what, code);
637                 }
638                 amfree(who);
639             }
640         } while (pid > 0 || wait_errno == EINTR);
641         if (errno != ECHILD)
642             sleep(1);
643         if (count > 0)
644             count--;
645     } while ((errno != ECHILD) && (count != 0));
646     return (errno != ECHILD);
647 }
648
649 static void
650 kill_children(int signal)
651 {
652     dumper_t *dumper;
653
654     if(!nodump) {
655         for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
656             if (!dumper->down && dumper->pid > 1) {
657                 g_printf(_("driver: sending signal %d to %s pid %u\n"), signal,
658                        dumper->name, (unsigned)dumper->pid);
659                 if (kill(dumper->pid, signal) == -1 && errno == ESRCH) {
660                     if (dumper->chunker)
661                         dumper->chunker->pid = 0;
662                 }
663                 if (dumper->chunker && dumper->chunker->pid > 1) {
664                     g_printf(_("driver: sending signal %d to %s pid %u\n"), signal,
665                            dumper->chunker->name,
666                            (unsigned)dumper->chunker->pid);
667                     if (kill(dumper->chunker->pid, signal) == -1 &&
668                         errno == ESRCH)
669                         dumper->chunker->pid = 0;
670                 }
671             }
672         }
673     }
674
675     if(taper_pid > 1) {
676         g_printf(_("driver: sending signal %d to %s pid %u\n"), signal,
677                "taper", (unsigned)taper_pid);
678         if (kill(taper_pid, signal) == -1 && errno == ESRCH)
679             taper_pid = 0;
680     }
681 }
682
683 static void
684 wait_for_children(void)
685 {
686     dumper_t *dumper;
687
688     if(!nodump) {
689         for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
690             if (dumper->pid > 1 && dumper->fd >= 0) {
691                 dumper_cmd(dumper, QUIT, NULL, NULL);
692                 if (dumper->chunker && dumper->chunker->pid > 1 &&
693                     dumper->chunker->fd >= 0)
694                     chunker_cmd(dumper->chunker, QUIT, NULL, NULL);
695             }
696         }
697     }
698
699     if(taper_pid > 1 && taper > 0) {
700         taper_cmd(QUIT, NULL, NULL, 0, NULL);
701     }
702
703     if(wait_children(60) == 0)
704         return;
705
706     kill_children(SIGHUP);
707     if(wait_children(60) == 0)
708         return;
709
710     kill_children(SIGKILL);
711     if(wait_children(-1) == 0)
712         return;
713
714 }
715
716 static void
717 startaflush(void)
718 {
719     disk_t *dp = NULL;
720     disk_t *fit = NULL;
721     char *datestamp;
722     int extra_tapes = 0;
723     char *qname;
724     TapeAction result_tape_action;
725     char *why_no_new_tape;
726
727     result_tape_action = tape_action(&why_no_new_tape);
728
729     if (result_tape_action & TAPE_ACTION_NEW_TAPE) {
730         taper_state &= ~TAPER_STATE_WAIT_FOR_TAPE;
731         taper_cmd(NEW_TAPE, NULL, NULL, 0, NULL);
732     } else if (result_tape_action & TAPE_ACTION_NO_NEW_TAPE) {
733         taper_state &= ~TAPER_STATE_WAIT_FOR_TAPE;
734         taper_cmd(NO_NEW_TAPE, why_no_new_tape, NULL, 0, NULL);
735         start_degraded_mode(&runq);
736     }
737
738     if (!degraded_mode && !taper_busy && !empty(tapeq) &&
739         (result_tape_action & TAPE_ACTION_START_A_FLUSH)) {
740         
741         datestamp = sched(tapeq.head)->datestamp;
742         switch(conf_taperalgo) {
743         case ALGO_FIRST:
744                 dp = dequeue_disk(&tapeq);
745                 break;
746         case ALGO_FIRSTFIT:
747                 fit = tapeq.head;
748                 while (fit != NULL) {
749                     extra_tapes = (fit->tape_splitsize > (off_t)0) ? 
750                                         conf_runtapes - current_tape : 0;
751                     if(sched(fit)->act_size <= (tape_left +
752                              tape_length * (off_t)extra_tapes) &&
753                              strcmp(sched(fit)->datestamp, datestamp) <= 0) {
754                         dp = fit;
755                         fit = NULL;
756                     }
757                     else {
758                         fit = fit->next;
759                     }
760                 }
761                 if(dp) remove_disk(&tapeq, dp);
762                 break;
763         case ALGO_LARGEST:
764                 fit = dp = tapeq.head;
765                 while (fit != NULL) {
766                     if(sched(fit)->act_size > sched(dp)->act_size &&
767                        strcmp(sched(fit)->datestamp, datestamp) <= 0) {
768                         dp = fit;
769                     }
770                     fit = fit->next;
771                 }
772                 if(dp) remove_disk(&tapeq, dp);
773                 break;
774         case ALGO_LARGESTFIT:
775                 fit = tapeq.head;
776                 while (fit != NULL) {
777                     extra_tapes = (fit->tape_splitsize > (off_t)0) ? 
778                                         conf_runtapes - current_tape : 0;
779                     if(sched(fit)->act_size <=
780                        (tape_left + tape_length * (off_t)extra_tapes) &&
781                        (!dp || sched(fit)->act_size > sched(dp)->act_size) &&
782                        strcmp(sched(fit)->datestamp, datestamp) <= 0) {
783                         dp = fit;
784                     }
785                     fit = fit->next;
786                 }
787                 if(dp) remove_disk(&tapeq, dp);
788                 break;
789         case ALGO_SMALLEST:
790                 break;
791         case ALGO_LAST:
792                 dp = tapeq.tail;
793                 remove_disk(&tapeq, dp);
794                 break;
795         }
796         if(!dp) { /* ALGO_SMALLEST, or default if nothing fit. */
797             if(conf_taperalgo != ALGO_SMALLEST)  {
798                 g_fprintf(stderr,
799                    _("driver: startaflush: Using SMALLEST because nothing fit\n"));
800             }
801             fit = dp = tapeq.head;
802             while (fit != NULL) {
803                 if(sched(fit)->act_size < sched(dp)->act_size &&
804                    strcmp(sched(fit)->datestamp, datestamp) <= 0) {
805                     dp = fit;
806                 }
807                 fit = fit->next;
808             }
809             if(dp) remove_disk(&tapeq, dp);
810         }
811         if(taper_ev_read == NULL) {
812             taper_ev_read = event_register((event_id_t)taper, EV_READFD,
813                                            handle_taper_result, NULL);
814         }
815         if (dp) {
816             taper_disk = dp;
817             taper_busy = 1;
818             amfree(taper_input_error);
819             amfree(taper_tape_error);
820             taper_result = LAST_TOK;
821             taper_sendresult = 0;
822             taper_first_label = NULL;
823             taper_written = 0;
824             taper_state &= ~TAPER_STATE_DUMP_TO_TAPE;
825             taper_dumper = NULL;
826             qname = quote_string(dp->name);
827             taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
828                       sched(dp)->datestamp);
829             g_fprintf(stderr,_("driver: startaflush: %s %s %s %lld %lld\n"),
830                     taperalgo2str(conf_taperalgo), dp->host->hostname, qname,
831                     (long long)sched(taper_disk)->act_size,
832                     (long long)tape_left);
833             if(sched(dp)->act_size <= tape_left)
834                 tape_left -= sched(dp)->act_size;
835             else
836                 tape_left = (off_t)0;
837             amfree(qname);
838         } else {
839             error(_("FATAL: Taper marked busy and no work found."));
840             /*NOTREACHED*/
841         }
842         short_dump_state();
843     } else if(!taper_busy && taper_ev_read != NULL) {
844         event_release(taper_ev_read);
845         taper_ev_read = NULL;
846     }
847 }
848
849 static int
850 client_constrained(
851     disk_t *    dp)
852 {
853     disk_t *dp2;
854
855     /* first, check if host is too busy */
856
857     if(dp->host->inprogress >= dp->host->maxdumps) {
858         return 1;
859     }
860
861     /* next, check conflict with other dumps on same spindle */
862
863     if(dp->spindle == -1) {     /* but spindle -1 never conflicts by def. */
864         return 0;
865     }
866
867     for(dp2 = dp->host->disks; dp2 != NULL; dp2 = dp2->hostnext)
868         if(dp2->inprogress && dp2->spindle == dp->spindle) {
869             return 1;
870         }
871
872     return 0;
873 }
874
875 static void
876 start_some_dumps(
877     disklist_t *        rq)
878 {
879     int cur_idle;
880     disk_t *diskp, *delayed_diskp, *diskp_accept;
881     disk_t *dp;
882     assignedhd_t **holdp=NULL, **holdp_accept;
883     const time_t now = time(NULL);
884     cmd_t cmd;
885     int result_argc;
886     char **result_argv;
887     chunker_t *chunker;
888     dumper_t *dumper;
889     char dumptype;
890     char *dumporder;
891     int  busy_dumpers = 0;
892
893     idle_reason = IDLE_NO_DUMPERS;
894     sleep_time = 0;
895
896     if(dumpers_ev_time != NULL) {
897         event_release(dumpers_ev_time);
898         dumpers_ev_time = NULL;
899     }
900
901     for(dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
902         if( dumper->busy ) {
903             busy_dumpers++;
904         }
905     }
906
907     for (dumper = dmptable; dumper < dmptable+inparallel; dumper++) {
908
909         if( dumper->busy || dumper->down) {
910             continue;
911         }
912
913         if (dumper->ev_read != NULL) {
914             event_release(dumper->ev_read);
915             dumper->ev_read = NULL;
916         }
917
918         /*
919          * A potential problem with starting from the bottom of the dump time
920          * distribution is that a slave host will have both one of the shortest
921          * and one of the longest disks, so starting its shortest disk first will
922          * tie up the host and eliminate its longest disk from consideration the
923          * first pass through.  This could cause a big delay in starting that long
924          * disk, which could drag out the whole night's dumps.
925          *
926          * While starting from the top of the dump time distribution solves the
927          * above problem, this turns out to be a bad idea, because the big dumps
928          * will almost certainly pack the holding disk completely, leaving no
929          * room for even one small dump to start.  This ends up shutting out the
930          * small-end dumpers completely (they stay idle).
931          *
932          * The introduction of multiple simultaneous dumps to one host alleviates
933          * the biggest&smallest dumps problem: both can be started at the
934          * beginning.
935          */
936
937         diskp_accept = NULL;
938         holdp_accept = NULL;
939         delayed_diskp = NULL;
940
941         cur_idle = NOT_IDLE;
942
943         dumporder = getconf_str(CNF_DUMPORDER);
944         if(strlen(dumporder) > (size_t)(dumper-dmptable)) {
945             dumptype = dumporder[dumper-dmptable];
946         }
947         else {
948             if(dumper-dmptable < 3)
949                 dumptype = 't';
950             else
951                 dumptype = 'T';
952         }
953
954         for(diskp = rq->head; diskp != NULL; diskp = diskp->next) {
955             assert(diskp->host != NULL && sched(diskp) != NULL);
956
957             if (diskp->host->start_t > now) {
958                 cur_idle = max(cur_idle, IDLE_START_WAIT);
959                 if (delayed_diskp == NULL || sleep_time > diskp->host->start_t) {
960                     delayed_diskp = diskp;
961                     sleep_time = diskp->host->start_t;
962                 }
963             } else if(diskp->start_t > now) {
964                 cur_idle = max(cur_idle, IDLE_START_WAIT);
965                 if (delayed_diskp == NULL || sleep_time > diskp->start_t) {
966                     delayed_diskp = diskp;
967                     sleep_time = diskp->start_t;
968                 }
969             } else if (diskp->host->netif->curusage > 0 &&
970                        sched(diskp)->est_kps > free_kps(diskp->host->netif)) {
971                 cur_idle = max(cur_idle, IDLE_NO_BANDWIDTH);
972             } else if(sched(diskp)->no_space) {
973                 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
974             } else if (diskp->to_holdingdisk == HOLD_NEVER) {
975                 cur_idle = max(cur_idle, IDLE_NO_HOLD);
976             } else if ((holdp =
977                 find_diskspace(sched(diskp)->est_size, &cur_idle, NULL)) == NULL) {
978                 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
979                 if (empty(tapeq) && busy_dumpers == 0) {
980                     remove_disk(rq, diskp);
981                     enqueue_disk(&directq, diskp);
982                 }
983             } else if (client_constrained(diskp)) {
984                 free_assignedhd(holdp);
985                 cur_idle = max(cur_idle, IDLE_CLIENT_CONSTRAINED);
986             } else {
987
988                 /* disk fits, dump it */
989                 int accept = !diskp_accept;
990                 if(!accept) {
991                     switch(dumptype) {
992                       case 's': accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
993                                 break;
994                       case 'S': accept = (sched(diskp)->est_size > sched(diskp_accept)->est_size);
995                                 break;
996                       case 't': accept = (sched(diskp)->est_time < sched(diskp_accept)->est_time);
997                                 break;
998                       case 'T': accept = (sched(diskp)->est_time > sched(diskp_accept)->est_time);
999                                 break;
1000                       case 'b': accept = (sched(diskp)->est_kps < sched(diskp_accept)->est_kps);
1001                                 break;
1002                       case 'B': accept = (sched(diskp)->est_kps > sched(diskp_accept)->est_kps);
1003                                 break;
1004                       default:  log_add(L_WARNING, _("Unknown dumporder character \'%c\', using 's'.\n"),
1005                                         dumptype);
1006                                 accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
1007                                 break;
1008                     }
1009                 }
1010                 if(accept) {
1011                     if( !diskp_accept || !degraded_mode || diskp->priority >= diskp_accept->priority) {
1012                         if(holdp_accept) free_assignedhd(holdp_accept);
1013                         diskp_accept = diskp;
1014                         holdp_accept = holdp;
1015                     }
1016                     else {
1017                         free_assignedhd(holdp);
1018                     }
1019                 }
1020                 else {
1021                     free_assignedhd(holdp);
1022                 }
1023             }
1024         }
1025
1026         diskp = diskp_accept;
1027         holdp = holdp_accept;
1028
1029         idle_reason = max(idle_reason, cur_idle);
1030
1031         /*
1032          * If we have no disk at this point, and there are disks that
1033          * are delayed, then schedule a time event to call this dumper
1034          * with the disk with the shortest delay.
1035          */
1036         if (diskp == NULL && delayed_diskp != NULL) {
1037             assert(sleep_time > now);
1038             sleep_time -= now;
1039             dumpers_ev_time = event_register((event_id_t)sleep_time, EV_TIME,
1040                 handle_dumpers_time, &runq);
1041             return;
1042         } else if (diskp != NULL) {
1043             sched(diskp)->act_size = (off_t)0;
1044             allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
1045             sched(diskp)->activehd = assign_holdingdisk(holdp, diskp);
1046             amfree(holdp);
1047             sched(diskp)->destname = newstralloc(sched(diskp)->destname,
1048                                                  sched(diskp)->holdp[0]->destname);
1049             diskp->host->inprogress++;  /* host is now busy */
1050             diskp->inprogress = 1;
1051             sched(diskp)->dumper = dumper;
1052             sched(diskp)->timestamp = now;
1053
1054             dumper->busy = 1;           /* dumper is now busy */
1055             dumper->dp = diskp;         /* link disk to dumper */
1056             remove_disk(rq, diskp);             /* take it off the run queue */
1057
1058             sched(diskp)->origsize = (off_t)-1;
1059             sched(diskp)->dumpsize = (off_t)-1;
1060             sched(diskp)->dumptime = (time_t)0;
1061             sched(diskp)->tapetime = (time_t)0;
1062             chunker = dumper->chunker;
1063             chunker->result = LAST_TOK;
1064             dumper->result = LAST_TOK;
1065             startup_chunk_process(chunker,chunker_program);
1066             chunker_cmd(chunker, START, NULL, driver_timestamp);
1067             chunker->dumper = dumper;
1068             chunker_cmd(chunker, PORT_WRITE, diskp, NULL);
1069             cmd = getresult(chunker->fd, 1, &result_argc, &result_argv);
1070             if(cmd != PORT) {
1071                 assignedhd_t **h=NULL;
1072                 int activehd;
1073                 char *qname = quote_string(diskp->name);
1074
1075                 g_printf(_("driver: did not get PORT from %s for %s:%s\n"),
1076                        chunker->name, diskp->host->hostname, qname);
1077                 amfree(qname);
1078                 fflush(stdout);
1079
1080                 deallocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
1081                 h = sched(diskp)->holdp;
1082                 activehd = sched(diskp)->activehd;
1083                 h[activehd]->used = 0;
1084                 h[activehd]->disk->allocated_dumpers--;
1085                 adjust_diskspace(diskp, DONE);
1086                 delete_diskspace(diskp);
1087                 diskp->host->inprogress--;
1088                 diskp->inprogress = 0;
1089                 sched(diskp)->dumper = NULL;
1090                 dumper->busy = 0;
1091                 dumper->dp = NULL;
1092                 sched(diskp)->dump_attempted++;
1093                 free_serial_dp(diskp);
1094                 if(sched(diskp)->dump_attempted < 2)
1095                     enqueue_disk(rq, diskp);
1096             }
1097             else {
1098                 dumper->ev_read = event_register((event_id_t)dumper->fd, EV_READFD,
1099                                                  handle_dumper_result, dumper);
1100                 chunker->ev_read = event_register((event_id_t)chunker->fd, EV_READFD,
1101                                                    handle_chunker_result, chunker);
1102                 dumper->output_port = atoi(result_argv[1]);
1103
1104                 if (diskp->host->pre_script == 0) {
1105                     for (dp=diskp->host->disks; dp != NULL; dp = dp->hostnext) {
1106                         run_server_scripts(EXECUTE_ON_PRE_HOST_BACKUP,
1107                                            get_config_name(), dp, -1);
1108                     }
1109                     diskp->host->pre_script = 1;
1110                 }
1111                 run_server_scripts(EXECUTE_ON_PRE_DLE_BACKUP,
1112                                    get_config_name(), diskp,
1113                                    sched(diskp)->level);
1114                 dumper_cmd(dumper, PORT_DUMP, diskp, NULL);
1115             }
1116             diskp->host->start_t = now + 15;
1117
1118             if (result_argv)
1119                 g_strfreev(result_argv);
1120             short_dump_state();
1121         }
1122     }
1123 }
1124
1125 /*
1126  * This gets called when a dumper is delayed for some reason.  It may
1127  * be because a disk has a delayed start, or amanda is constrained
1128  * by network or disk limits.
1129  */
1130
1131 static void
1132 handle_dumpers_time(
1133     void *      cookie)
1134 {
1135     disklist_t *runq = cookie;
1136     event_release(dumpers_ev_time);
1137     dumpers_ev_time = NULL; 
1138     start_some_dumps(runq);
1139 }
1140
1141 static void
1142 dump_schedule(
1143     disklist_t *qp,
1144     char *      str)
1145 {
1146     disk_t *dp;
1147     char *qname;
1148
1149     g_printf(_("dump of driver schedule %s:\n--------\n"), str);
1150
1151     for(dp = qp->head; dp != NULL; dp = dp->next) {
1152         qname = quote_string(dp->name);
1153         g_printf("  %-20s %-25s lv %d t %5lu s %lld p %d\n",
1154                dp->host->hostname, qname, sched(dp)->level,
1155                sched(dp)->est_time,
1156                (long long)sched(dp)->est_size, sched(dp)->priority);
1157         amfree(qname);
1158     }
1159     g_printf("--------\n");
1160 }
1161
1162 static void
1163 start_degraded_mode(
1164     /*@keep@*/ disklist_t *queuep)
1165 {
1166     disk_t *dp;
1167     disklist_t newq;
1168     off_t est_full_size;
1169     char *qname;
1170
1171     newq.head = newq.tail = 0;
1172
1173     dump_schedule(queuep, _("before start degraded mode"));
1174
1175     est_full_size = (off_t)0;
1176     while(!empty(*queuep)) {
1177         dp = dequeue_disk(queuep);
1178
1179         qname = quote_string(dp->name);
1180         if(sched(dp)->level != 0)
1181             /* go ahead and do the disk as-is */
1182             enqueue_disk(&newq, dp);
1183         else {
1184             if (reserved_space + est_full_size + sched(dp)->est_size
1185                 <= total_disksize) {
1186                 enqueue_disk(&newq, dp);
1187                 est_full_size += sched(dp)->est_size;
1188             }
1189             else if(sched(dp)->degr_level != -1) {
1190                 sched(dp)->level = sched(dp)->degr_level;
1191                 sched(dp)->dumpdate = sched(dp)->degr_dumpdate;
1192                 sched(dp)->est_nsize = sched(dp)->degr_nsize;
1193                 sched(dp)->est_csize = sched(dp)->degr_csize;
1194                 sched(dp)->est_time = sched(dp)->degr_time;
1195                 sched(dp)->est_kps  = sched(dp)->degr_kps;
1196                 enqueue_disk(&newq, dp);
1197             }
1198             else {
1199                 log_add(L_FAIL, "%s %s %s %d [%s]",
1200                         dp->host->hostname, qname, sched(dp)->datestamp,
1201                         sched(dp)->level, sched(dp)->degr_mesg);
1202             }
1203         }
1204         amfree(qname);
1205     }
1206
1207     /*@i@*/ *queuep = newq;
1208     degraded_mode = 1;
1209
1210     dump_schedule(queuep, _("after start degraded mode"));
1211 }
1212
1213
1214 static void
1215 continue_port_dumps(void)
1216 {
1217     disk_t *dp, *ndp;
1218     assignedhd_t **h;
1219     int active_dumpers=0, busy_dumpers=0, i;
1220     dumper_t *dumper;
1221
1222     /* First we try to grant diskspace to some dumps waiting for it. */
1223     for( dp = roomq.head; dp; dp = ndp ) {
1224         ndp = dp->next;
1225         /* find last holdingdisk used by this dump */
1226         for( i = 0, h = sched(dp)->holdp; h[i+1]; i++ ) {
1227             (void)h; /* Quiet lint */
1228         }
1229         /* find more space */
1230         h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1231                             &active_dumpers, h[i] );
1232         if( h ) {
1233             for(dumper = dmptable; dumper < dmptable + inparallel &&
1234                                    dumper->dp != dp; dumper++) {
1235                 (void)dp; /* Quiet lint */
1236             }
1237             assert( dumper < dmptable + inparallel );
1238             sched(dp)->activehd = assign_holdingdisk( h, dp );
1239             chunker_cmd( dumper->chunker, CONTINUE, dp, NULL );
1240             amfree(h);
1241             remove_disk( &roomq, dp );
1242         }
1243     }
1244
1245     /* So for some disks there is less holding diskspace available than
1246      * was asked for. Possible reasons are
1247      * a) diskspace has been allocated for other dumps which are
1248      *    still running or already being written to tape
1249      * b) all other dumps have been suspended due to lack of diskspace
1250      * c) this dump doesn't fit on all the holding disks
1251      * Case a) is not a problem. We just wait for the diskspace to
1252      * be freed by moving the current disk to a queue.
1253      * If case b) occurs, we have a deadlock situation. We select
1254      * a dump from the queue to be aborted and abort it. It will
1255      * be retried later dumping to disk.
1256      * If case c) is detected, the dump is aborted. Next time
1257      * it will be dumped directly to tape. Actually, case c is a special
1258      * manifestation of case b) where only one dumper is busy.
1259      */
1260     for(dp=NULL, dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
1261         if( dumper->busy ) {
1262             busy_dumpers++;
1263             if( !find_disk(&roomq, dumper->dp) ) {
1264                 active_dumpers++;
1265             } else if( !dp || 
1266                        sched(dp)->est_size > sched(dumper->dp)->est_size ) {
1267                 dp = dumper->dp;
1268             }
1269         }
1270     }
1271     if((dp != NULL) && (active_dumpers == 0) && (busy_dumpers > 0) && 
1272         ((!taper_busy && empty(tapeq)) || degraded_mode) &&
1273         pending_aborts == 0 ) { /* not case a */
1274         if( busy_dumpers == 1 ) { /* case c */
1275             sched(dp)->no_space = 1;
1276         }
1277         /* case b */
1278         /* At this time, dp points to the dump with the smallest est_size.
1279          * We abort that dump, hopefully not wasting too much time retrying it.
1280          */
1281         remove_disk( &roomq, dp );
1282         chunker_cmd(sched(dp)->dumper->chunker, ABORT, NULL, _("Not enough holding disk space"));
1283         dumper_cmd( sched(dp)->dumper, ABORT, NULL, _("Not enough holding disk space"));
1284         pending_aborts++;
1285     }
1286 }
1287
1288
1289 static void
1290 handle_taper_result(
1291         void *cookie G_GNUC_UNUSED)
1292 {
1293     disk_t *dp;
1294     cmd_t cmd;
1295     int result_argc;
1296     char **result_argv;
1297     char *qname, *q;
1298     char *s;
1299
1300     assert(cookie == NULL);
1301     amfree(taper_input_error);
1302     amfree(taper_tape_error);
1303     
1304     do {
1305         
1306         short_dump_state();
1307         
1308         cmd = getresult(taper, 1, &result_argc, &result_argv);
1309         
1310         switch(cmd) {
1311             
1312         case FAILED:    /* FAILED <handle> INPUT-* TAPE-* <input err mesg> <tape err mesg> */
1313             if(result_argc != 6) {
1314                 error(_("error: [taper FAILED result_argc != 6: %d"), result_argc);
1315                 /*NOTREACHED*/
1316             }
1317             
1318             dp = serial2disk(result_argv[1]);
1319             assert(dp == taper_disk);
1320             if (!taper_dumper)
1321                 free_serial(result_argv[1]);
1322             
1323             qname = quote_string(dp->name);
1324             g_printf(_("driver: finished-cmd time %s taper wrote %s:%s\n"),
1325                    walltime_str(curclock()), dp->host->hostname, qname);
1326             fflush(stdout);
1327
1328             if (strcmp(result_argv[2], "INPUT-ERROR") == 0) {
1329                 taper_input_error = newstralloc(taper_input_error, result_argv[4]);
1330             } else if (strcmp(result_argv[2], "INPUT-GOOD") != 0) {
1331                 taper_tape_error = newstralloc(taper_tape_error,
1332                                                _("Taper protocol error"));
1333                 taper_result = FAILED;
1334                 log_add(L_FAIL, _("%s %s %s %d [%s]"),
1335                         dp->host->hostname, qname, sched(dp)->datestamp,
1336                         sched(dp)->level, taper_tape_error);
1337                 amfree(qname);
1338                 break;
1339             }
1340             if (strcmp(result_argv[3], "TAPE-ERROR") == 0) {
1341                 taper_tape_error = newstralloc(taper_tape_error, result_argv[5]);
1342             } else if (strcmp(result_argv[3], "TAPE-GOOD") != 0) {
1343                 taper_tape_error = newstralloc(taper_tape_error,
1344                                                _("Taper protocol error"));
1345                 taper_result = FAILED;
1346                 log_add(L_FAIL, _("%s %s %s %d [%s]"),
1347                         dp->host->hostname, qname, sched(dp)->datestamp,
1348                         sched(dp)->level, taper_tape_error);
1349                 amfree(qname);
1350                 break;
1351             }
1352
1353             amfree(qname);
1354             taper_result = cmd;
1355
1356             break;
1357             
1358         case PARTIAL:   /* PARTIAL <handle> INPUT-* TAPE-* <stat mess> <input err mesg> <tape err mesg>*/
1359         case DONE:      /* DONE <handle> INPUT-GOOD TAPE-GOOD <stat mess> <input err mesg> <tape err mesg> */
1360             if(result_argc != 7) {
1361                 error(_("error: [taper PARTIAL result_argc != 7: %d"), result_argc);
1362                 /*NOTREACHED*/
1363             }
1364             
1365             dp = serial2disk(result_argv[1]);
1366             assert(dp == taper_disk);
1367             if (!taper_dumper)
1368                 free_serial(result_argv[1]);
1369
1370             qname = quote_string(dp->name);
1371             g_printf(_("driver: finished-cmd time %s taper wrote %s:%s\n"),
1372                    walltime_str(curclock()), dp->host->hostname, qname);
1373             fflush(stdout);
1374
1375             if (strcmp(result_argv[2], "INPUT-ERROR") == 0) {
1376                 taper_input_error = newstralloc(taper_input_error, result_argv[5]);
1377             } else if (strcmp(result_argv[2], "INPUT-GOOD") != 0) {
1378                 taper_tape_error = newstralloc(taper_tape_error,
1379                                                _("Taper protocol error"));
1380                 taper_result = FAILED;
1381                 log_add(L_FAIL, _("%s %s %s %d [%s]"),
1382                         dp->host->hostname, qname, sched(dp)->datestamp,
1383                         sched(dp)->level, taper_tape_error);
1384                 amfree(qname);
1385                 break;
1386             }
1387             if (strcmp(result_argv[3], "TAPE-ERROR") == 0) {
1388                 taper_tape_error = newstralloc(taper_tape_error, result_argv[6]);
1389             } else if (strcmp(result_argv[3], "TAPE-GOOD") != 0) {
1390                 taper_tape_error = newstralloc(taper_tape_error,
1391                                                _("Taper protocol error"));
1392                 taper_result = FAILED;
1393                 log_add(L_FAIL, _("%s %s %s %d [%s]"),
1394                         dp->host->hostname, qname, sched(dp)->datestamp,
1395                         sched(dp)->level, taper_tape_error);
1396                 amfree(qname);
1397                 break;
1398             }
1399
1400             s = strstr(result_argv[4], " kb ");
1401             if (s) {
1402                 s += 4;
1403                 sched(dp)->dumpsize = atol(s);
1404             }
1405
1406             taper_result = cmd;
1407             amfree(qname);
1408
1409             break;
1410             
1411         case PARTDONE:  /* PARTDONE <handle> <label> <fileno> <kbytes> <stat> */
1412             dp = serial2disk(result_argv[1]);
1413             assert(dp == taper_disk);
1414             if (result_argc != 6) {
1415                 error(_("error [taper PARTDONE result_argc != 6: %d]"),
1416                       result_argc);
1417                 /*NOTREACHED*/
1418             }
1419             if (!taper_first_label) {
1420                 taper_first_label = stralloc(result_argv[2]);
1421                 taper_first_fileno = OFF_T_ATOI(result_argv[3]);
1422             }
1423             taper_written = OFF_T_ATOI(result_argv[4]);
1424             if (taper_written > sched(taper_disk)->act_size)
1425                 sched(taper_disk)->act_size = taper_written;
1426             
1427             break;
1428
1429         case REQUEST_NEW_TAPE:  /* REQUEST-NEW-TAPE <handle> */
1430             if (result_argc != 2) {
1431                 error(_("error [taper REQUEST_NEW_TAPE result_argc != 2: %d]"),
1432                       result_argc);
1433                 /*NOTREACHED*/
1434             }
1435             taper_state &= ~TAPER_STATE_TAPE_STARTED;
1436
1437             if (current_tape >= conf_runtapes) {
1438                 taper_cmd(NO_NEW_TAPE, "runtapes volumes already written", NULL, 0, NULL);
1439                 log_add(L_WARNING,
1440                         _("Out of tapes; going into degraded mode."));
1441                 start_degraded_mode(&runq);
1442             } else {
1443                 TapeAction result_tape_action;
1444                 char *why_no_new_tape;
1445
1446                 taper_state |= TAPER_STATE_WAIT_FOR_TAPE;
1447                 result_tape_action = tape_action(&why_no_new_tape);
1448                 if (result_tape_action & TAPE_ACTION_NEW_TAPE) {
1449                     taper_cmd(NEW_TAPE, NULL, NULL, 0, NULL);
1450                     taper_state &= ~TAPER_STATE_WAIT_FOR_TAPE;
1451                 } else if (result_tape_action & TAPE_ACTION_NO_NEW_TAPE) {
1452                     taper_cmd(NO_NEW_TAPE, why_no_new_tape, NULL, 0, NULL);
1453                     taper_state &= ~TAPER_STATE_WAIT_FOR_TAPE;
1454                     start_degraded_mode(&runq);
1455                 }
1456             }
1457             break;
1458
1459         case NEW_TAPE: /* NEW-TAPE <handle> <label> */
1460             if (result_argc != 3) {
1461                 error(_("error [taper NEW_TAPE result_argc != 3: %d]"),
1462                       result_argc);
1463                 /*NOTREACHED*/
1464             }
1465
1466             /* Update our tape counter and reset tape_left */
1467             current_tape++;
1468             tape_left = tape_length;
1469             taper_state |= TAPER_STATE_TAPE_STARTED;
1470             break;
1471
1472         case NO_NEW_TAPE:  /* NO-NEW-TAPE <handle> */
1473             if (result_argc != 2) {
1474                 error(_("error [taper NO_NEW_TAPE result_argc != 2: %d]"),
1475                       result_argc);
1476                 /*NOTREACHED*/
1477             }
1478             start_degraded_mode(&runq);
1479             break;
1480
1481         case DUMPER_STATUS:  /* DUMPER-STATUS <handle> */
1482             if (result_argc != 2) {
1483                 error(_("error [taper NO_NEW_TAPE result_argc != 2: %d]"),
1484                       result_argc);
1485                 /*NOTREACHED*/
1486             }
1487             if (taper_dumper->result == LAST_TOK) {
1488                 taper_sendresult = 1;
1489             } else {
1490                 if( taper_dumper->result == DONE) {
1491                     taper_cmd(DONE, NULL, NULL, 0, NULL);
1492                 } else {
1493                     taper_cmd(FAILED, NULL, NULL, 0, NULL);
1494                 }
1495             }
1496             break;
1497
1498         case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
1499             dp = serial2disk(result_argv[1]);
1500             if (!taper_dumper)
1501                 free_serial(result_argv[1]);
1502             qname = quote_string(dp->name);
1503             g_printf(_("driver: finished-cmd time %s taper wrote %s:%s\n"),
1504                    walltime_str(curclock()), dp->host->hostname, qname);
1505             amfree(qname);
1506             fflush(stdout);
1507             q = quote_string(result_argv[2]);
1508             log_add(L_WARNING, _("Taper error: %s"), q);
1509             amfree(q);
1510             taper_tape_error = newstralloc(taper_tape_error, result_argv[2]);
1511             /*FALLTHROUGH*/
1512
1513         case BOGUS:
1514             if (cmd == BOGUS) {
1515                 log_add(L_WARNING, _("Taper protocol error"));
1516                 taper_tape_error = newstralloc(taper_tape_error, "BOGUS");
1517             }
1518             /*
1519              * Since we received a taper error, we can't send anything more
1520              * to the taper.  Go into degraded mode to try to get everthing
1521              * onto disk.  Later, these dumps can be flushed to a new tape.
1522              * The tape queue is zapped so that it appears empty in future
1523              * checks. If there are dumps waiting for diskspace to be freed,
1524              * cancel one.
1525              */
1526             if(!nodump) {
1527                 log_add(L_WARNING,
1528                         _("going into degraded mode because of taper component error."));
1529             }
1530             start_degraded_mode(&runq);
1531             tapeq.head = tapeq.tail = NULL;
1532             taper_busy = 0;
1533             if(taper_ev_read != NULL) {
1534                 event_release(taper_ev_read);
1535                 taper_ev_read = NULL;
1536             }
1537             if(cmd != TAPE_ERROR) aclose(taper);
1538             taper_result = cmd;
1539
1540             break;
1541
1542         default:
1543             error(_("driver received unexpected token (%s) from taper"),
1544                   cmdstr[cmd]);
1545             /*NOTREACHED*/
1546         }
1547
1548         g_strfreev(result_argv);
1549
1550         if (taper_result != LAST_TOK) {
1551             if(taper_dumper) {
1552                 if (taper_dumper->result != LAST_TOK) {
1553                     // Dumper already returned it's result
1554                     dumper_taper_result(taper_disk);
1555                 }
1556             } else {
1557                 file_taper_result(taper_disk);
1558             }
1559         }
1560         
1561     } while(areads_dataready(taper));
1562 }
1563
1564
1565 static void
1566 file_taper_result(
1567     disk_t *dp)
1568 {
1569     char *qname = quote_string(dp->name);
1570
1571     if (taper_result == DONE) {
1572         update_info_taper(dp, taper_first_label, taper_first_fileno,
1573                           sched(dp)->level);
1574     }
1575
1576     sched(dp)->taper_attempted += 1;
1577
1578     if (taper_input_error) {
1579         g_printf("driver: taper failed %s %s: %s\n",
1580                    dp->host->hostname, qname, taper_input_error);
1581         if (strcmp(sched(dp)->datestamp, driver_timestamp) == 0) {
1582             if(sched(dp)->taper_attempted >= 2) {
1583                 log_add(L_FAIL, _("%s %s %s %d [too many taper retries after holding disk error: %s]"),
1584                     dp->host->hostname, qname, sched(dp)->datestamp,
1585                     sched(dp)->level, taper_input_error);
1586                 g_printf("driver: taper failed %s %s, too many taper retry after holding disk error\n",
1587                    dp->host->hostname, qname);
1588                 amfree(sched(dp)->destname);
1589                 amfree(sched(dp)->dumpdate);
1590                 amfree(sched(dp)->degr_dumpdate);
1591                 amfree(sched(dp)->degr_mesg);
1592                 amfree(sched(dp)->datestamp);
1593                 amfree(dp->up);
1594             } else {
1595                 log_add(L_INFO, _("%s %s %s %d [Will retry dump because of holding disk error: %s]"),
1596                         dp->host->hostname, qname, sched(dp)->datestamp,
1597                         sched(dp)->level, taper_input_error);
1598                 g_printf("driver: taper will retry %s %s because of holding disk error\n",
1599                         dp->host->hostname, qname);
1600                 if (dp->to_holdingdisk != HOLD_REQUIRED) {
1601                     dp->to_holdingdisk = HOLD_NEVER;
1602                     sched(dp)->dump_attempted -= 1;
1603                     headqueue_disk(&directq, dp);
1604                 } else {
1605                     amfree(sched(dp)->destname);
1606                     amfree(sched(dp)->dumpdate);
1607                     amfree(sched(dp)->degr_dumpdate);
1608                     amfree(sched(dp)->degr_mesg);
1609                     amfree(sched(dp)->datestamp);
1610                     amfree(dp->up);
1611                 }
1612             }
1613         } else {
1614             amfree(sched(dp)->destname);
1615             amfree(sched(dp)->dumpdate);
1616             amfree(sched(dp)->degr_dumpdate);
1617             amfree(sched(dp)->degr_mesg);
1618             amfree(sched(dp)->datestamp);
1619             amfree(dp->up);
1620         }
1621     } else if (taper_tape_error) {
1622         g_printf("driver: taper failed %s %s with tape error: %s\n",
1623                    dp->host->hostname, qname, taper_tape_error);
1624         if(sched(dp)->taper_attempted >= 2) {
1625             log_add(L_FAIL, _("%s %s %s %d [too many taper retries]"),
1626                     dp->host->hostname, qname, sched(dp)->datestamp,
1627                     sched(dp)->level);
1628             g_printf("driver: taper failed %s %s, too many taper retry\n",
1629                    dp->host->hostname, qname);
1630             amfree(sched(dp)->destname);
1631             amfree(sched(dp)->dumpdate);
1632             amfree(sched(dp)->degr_dumpdate);
1633             amfree(sched(dp)->degr_mesg);
1634             amfree(sched(dp)->datestamp);
1635             amfree(dp->up);
1636         } else {
1637             g_printf("driver: taper will retry %s %s\n",
1638                    dp->host->hostname, qname);
1639             /* Re-insert into taper queue. */
1640             headqueue_disk(&tapeq, dp);
1641         }
1642     } else if (taper_result != DONE) {
1643         g_printf("driver: taper failed %s %s without error\n",
1644                    dp->host->hostname, qname);
1645     } else {
1646         delete_diskspace(dp);
1647         amfree(sched(dp)->destname);
1648         amfree(sched(dp)->dumpdate);
1649         amfree(sched(dp)->degr_dumpdate);
1650         amfree(sched(dp)->degr_mesg);
1651         amfree(sched(dp)->datestamp);
1652         amfree(dp->up);
1653     }
1654
1655     amfree(qname);
1656
1657     taper_busy = 0;
1658     amfree(taper_input_error);
1659     amfree(taper_tape_error);
1660     taper_disk = NULL;
1661             
1662     /* continue with those dumps waiting for diskspace */
1663     continue_port_dumps();
1664     start_some_dumps(&runq);
1665     startaflush();
1666 }
1667
1668 static void
1669 dumper_taper_result(
1670     disk_t *dp)
1671 {
1672     dumper_t *dumper;
1673     int is_partial;
1674     char *qname;
1675
1676     dumper = sched(dp)->dumper;
1677
1678     free_serial_dp(dp);
1679     if(dumper->result == DONE && taper_result == DONE) {
1680         update_info_dumper(dp, sched(dp)->origsize,
1681                            sched(dp)->dumpsize, sched(dp)->dumptime);
1682         update_info_taper(dp, taper_first_label, taper_first_fileno,
1683                           sched(dp)->level);
1684         qname = quote_string(dp->name); /*quote to take care of spaces*/
1685
1686         log_add(L_STATS, _("estimate %s %s %s %d [sec %ld nkb %lld ckb %lld kps %lu]"),
1687                 dp->host->hostname, qname, sched(dp)->datestamp,
1688                 sched(dp)->level,
1689                 sched(dp)->est_time, (long long)sched(dp)->est_nsize,
1690                 (long long)sched(dp)->est_csize,
1691                 sched(dp)->est_kps);
1692         amfree(qname);
1693     } else {
1694         update_failed_dump(dp);
1695     }
1696
1697     is_partial = dumper->result != DONE || taper_result != DONE;
1698
1699     sched(dp)->dump_attempted += 1;
1700     sched(dp)->taper_attempted += 1;
1701
1702     if((dumper->result != DONE || taper_result != DONE) &&
1703         sched(dp)->dump_attempted <= 1 &&
1704         sched(dp)->taper_attempted <= 1) {
1705         enqueue_disk(&directq, dp);
1706     }
1707
1708     if(dumper->ev_read != NULL) {
1709         event_release(dumper->ev_read);
1710         dumper->ev_read = NULL;
1711     }
1712     if(taper_ev_read != NULL) {
1713         event_release(taper_ev_read);
1714         taper_ev_read = NULL;
1715     }
1716     taper_busy = 0;
1717     amfree(taper_input_error);
1718     amfree(taper_tape_error);
1719     dumper->busy = 0;
1720     dp->host->inprogress -= 1;
1721     dp->inprogress = 0;
1722     deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1723 }
1724
1725
1726 static dumper_t *
1727 idle_dumper(void)
1728 {
1729     dumper_t *dumper;
1730
1731     for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1732         if(!dumper->busy && !dumper->down) return dumper;
1733
1734     return NULL;
1735 }
1736
1737 static void
1738 dumper_chunker_result(
1739     disk_t *    dp)
1740 {
1741     dumper_t *dumper;
1742     chunker_t *chunker;
1743     assignedhd_t **h=NULL;
1744     int activehd, i;
1745     off_t dummy;
1746     off_t size;
1747     int is_partial;
1748     char *qname;
1749
1750     dumper = sched(dp)->dumper;
1751     chunker = dumper->chunker;
1752
1753     free_serial_dp(dp);
1754
1755     h = sched(dp)->holdp;
1756     activehd = sched(dp)->activehd;
1757
1758     if(dumper->result == DONE && chunker->result == DONE) {
1759         update_info_dumper(dp, sched(dp)->origsize,
1760                            sched(dp)->dumpsize, sched(dp)->dumptime);
1761         qname = quote_string(dp->name);/*quote to take care of spaces*/
1762
1763         log_add(L_STATS, _("estimate %s %s %s %d [sec %ld nkb %lld ckb %lld kps %lu]"),
1764                 dp->host->hostname, qname, sched(dp)->datestamp,
1765                 sched(dp)->level,
1766                 sched(dp)->est_time, (long long)sched(dp)->est_nsize, 
1767                 (long long)sched(dp)->est_csize,
1768                 sched(dp)->est_kps);
1769         amfree(qname);
1770     } else {
1771         update_failed_dump(dp);
1772     }
1773
1774     deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1775
1776     is_partial = dumper->result != DONE || chunker->result != DONE;
1777     rename_tmp_holding(sched(dp)->destname, !is_partial);
1778
1779     dummy = (off_t)0;
1780     for( i = 0, h = sched(dp)->holdp; i < activehd; i++ ) {
1781         dummy += h[i]->used;
1782     }
1783
1784     size = holding_file_size(sched(dp)->destname, 0);
1785     h[activehd]->used = size - dummy;
1786     h[activehd]->disk->allocated_dumpers--;
1787     adjust_diskspace(dp, DONE);
1788
1789     sched(dp)->dump_attempted += 1;
1790
1791     if((dumper->result != DONE || chunker->result != DONE) &&
1792        sched(dp)->dump_attempted <= 1) {
1793         delete_diskspace(dp);
1794         if (sched(dp)->no_space) {
1795             enqueue_disk(&directq, dp);
1796         } else {
1797             enqueue_disk(&runq, dp);
1798         }
1799     }
1800     else if(size > (off_t)DISK_BLOCK_KB) {
1801         enqueue_disk(&tapeq, dp);
1802     }
1803     else {
1804         delete_diskspace(dp);
1805     }
1806
1807     dumper->busy = 0;
1808     dp->host->inprogress -= 1;
1809     dp->inprogress = 0;
1810
1811     waitpid(chunker->pid, NULL, 0 );
1812     aclose(chunker->fd);
1813     chunker->fd = -1;
1814     chunker->down = 1;
1815     
1816     dp = NULL;
1817     if (chunker->result == ABORT_FINISHED)
1818         pending_aborts--;
1819     continue_port_dumps();
1820     /*
1821      * Wakeup any dumpers that are sleeping because of network
1822      * or disk constraints.
1823      */
1824     start_some_dumps(&runq);
1825     startaflush();
1826 }
1827
1828
1829 static void
1830 handle_dumper_result(
1831         void * cookie)
1832 {
1833     /* uses global pending_aborts */
1834     dumper_t *dumper = cookie;
1835     disk_t *dp, *sdp, *dp1;
1836     cmd_t cmd;
1837     int result_argc;
1838     char *qname;
1839     char **result_argv;
1840
1841     assert(dumper != NULL);
1842     dp = dumper->dp;
1843     assert(dp != NULL);
1844     assert(sched(dp) != NULL);
1845     do {
1846
1847         short_dump_state();
1848
1849         cmd = getresult(dumper->fd, 1, &result_argc, &result_argv);
1850
1851         if(cmd != BOGUS) {
1852             /* result_argv[1] always contains the serial number */
1853             sdp = serial2disk(result_argv[1]);
1854             if (sdp != dp) {
1855                 error(_("Invalid serial number %s"), result_argv[1]);
1856                 g_assert_not_reached();
1857             }
1858         }
1859
1860         qname = quote_string(dp->name);
1861         switch(cmd) {
1862
1863         case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
1864             if(result_argc != 6) {
1865                 error(_("error [dumper DONE result_argc != 6: %d]"), result_argc);
1866                 /*NOTREACHED*/
1867             }
1868
1869             sched(dp)->origsize = OFF_T_ATOI(result_argv[2]);
1870             sched(dp)->dumptime = TIME_T_ATOI(result_argv[4]);
1871
1872             g_printf(_("driver: finished-cmd time %s %s dumped %s:%s\n"),
1873                    walltime_str(curclock()), dumper->name,
1874                    dp->host->hostname, qname);
1875             fflush(stdout);
1876
1877             dumper->result = cmd;
1878
1879             break;
1880
1881         case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1882             /*
1883              * Requeue this disk, and fall through to the FAILED
1884              * case for cleanup.
1885              */
1886             if(sched(dp)->dump_attempted) {
1887                 char *qname = quote_string(dp->name);
1888                 char *qerr = quote_string(result_argv[2]);
1889                 log_add(L_FAIL, _("%s %s %s %d [too many dumper retry: %s]"),
1890                     dp->host->hostname, qname, sched(dp)->datestamp,
1891                     sched(dp)->level, qerr);
1892                 g_printf(_("driver: dump failed %s %s %s, too many dumper retry: %s\n"),
1893                         result_argv[1], dp->host->hostname, qname, qerr);
1894                 amfree(qname);
1895                 amfree(qerr);
1896             }
1897             /* FALLTHROUGH */
1898         case FAILED: /* FAILED <handle> <errstr> */
1899             /*free_serial(result_argv[1]);*/
1900             dumper->result = cmd;
1901             break;
1902
1903         case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1904             /*
1905              * We sent an ABORT from the NO-ROOM case because this dump
1906              * wasn't going to fit onto the holding disk.  We now need to
1907              * clean up the remains of this image, and try to finish
1908              * other dumps that are waiting on disk space.
1909              */
1910             assert(pending_aborts);
1911             /*free_serial(result_argv[1]);*/
1912             dumper->result = cmd;
1913             break;
1914
1915         case BOGUS:
1916             /* either EOF or garbage from dumper.  Turn it off */
1917             log_add(L_WARNING, _("%s pid %ld is messed up, ignoring it.\n"),
1918                     dumper->name, (long)dumper->pid);
1919             if (dumper->ev_read) {
1920                 event_release(dumper->ev_read);
1921                 dumper->ev_read = NULL;
1922             }
1923             aclose(dumper->fd);
1924             dumper->busy = 0;
1925             dumper->down = 1;   /* mark it down so it isn't used again */
1926
1927             /* if it was dumping something, zap it and try again */
1928             if(sched(dp)->dump_attempted) {
1929                 log_add(L_FAIL, _("%s %s %s %d [%s died]"),
1930                         dp->host->hostname, qname, sched(dp)->datestamp,
1931                         sched(dp)->level, dumper->name);
1932             } else {
1933                 log_add(L_WARNING, _("%s died while dumping %s:%s lev %d."),
1934                         dumper->name, dp->host->hostname, qname,
1935                         sched(dp)->level);
1936             }
1937             dumper->result = cmd;
1938             break;
1939
1940         default:
1941             assert(0);
1942         }
1943         amfree(qname);
1944         g_strfreev(result_argv);
1945
1946         if (cmd != BOGUS) {
1947             int last_dump = 1;
1948             run_server_scripts(EXECUTE_ON_POST_DLE_BACKUP,
1949                                get_config_name(), dp, sched(dp)->level);
1950             for (dp1=runq.head; dp1 != NULL; dp1 = dp1->next) {
1951                 if (dp1 != dp) last_dump = 0;
1952             }
1953             if (last_dump && dp->host->post_script == 0) {
1954                 if (dp->host->post_script == 0) {
1955                     for (dp1=dp->host->disks; dp1 != NULL; dp1 = dp1->hostnext) {
1956                         run_server_scripts(EXECUTE_ON_POST_HOST_BACKUP,
1957                                            get_config_name(), dp1, -1);
1958                     }
1959                     dp->host->post_script = 1;
1960                 }
1961             }
1962         }
1963
1964         /* send the dumper result to the chunker */
1965         if (dumper->chunker) {
1966             if (dumper->chunker->down == 0 && dumper->chunker->fd != -1 &&
1967                 dumper->chunker->result == LAST_TOK) {
1968                 if (cmd == DONE) {
1969                     chunker_cmd(dumper->chunker, DONE, dp, NULL);
1970                 }
1971                 else {
1972                     chunker_cmd(dumper->chunker, FAILED, dp, NULL);
1973                 }
1974             }
1975             if( dumper->result != LAST_TOK &&
1976                 dumper->chunker->result != LAST_TOK)
1977                 dumper_chunker_result(dp);
1978         } else { /* send the dumper result to the taper */
1979             if (taper_sendresult) {
1980                 if (cmd == DONE) {
1981                     taper_cmd(DONE, driver_timestamp, NULL, 0, NULL);
1982                 } else {
1983                     taper_cmd(FAILED, driver_timestamp, NULL, 0, NULL);
1984                 }
1985                 taper_sendresult = 0;
1986             }
1987         }
1988         if (taper_dumper && taper_result != LAST_TOK) {
1989             dumper_taper_result(dp);
1990         }
1991     } while(areads_dataready(dumper->fd));
1992 }
1993
1994
1995 static void
1996 handle_chunker_result(
1997     void *      cookie)
1998 {
1999     chunker_t *chunker = cookie;
2000     assignedhd_t **h=NULL;
2001     dumper_t *dumper;
2002     disk_t *dp, *sdp;
2003     cmd_t cmd;
2004     int result_argc;
2005     char **result_argv;
2006     int dummy;
2007     int activehd = -1;
2008     char *qname;
2009
2010     assert(chunker != NULL);
2011     dumper = chunker->dumper;
2012     assert(dumper != NULL);
2013     dp = dumper->dp;
2014     assert(dp != NULL);
2015     assert(sched(dp) != NULL);
2016     assert(sched(dp)->destname != NULL);
2017     assert(dp != NULL && sched(dp) != NULL && sched(dp)->destname);
2018
2019     if(sched(dp)->holdp) {
2020         h = sched(dp)->holdp;
2021         activehd = sched(dp)->activehd;
2022     }
2023
2024     do {
2025         short_dump_state();
2026
2027         cmd = getresult(chunker->fd, 1, &result_argc, &result_argv);
2028
2029         if(cmd != BOGUS) {
2030             /* result_argv[1] always contains the serial number */
2031             sdp = serial2disk(result_argv[1]);
2032             if (sdp != dp) {
2033                 error(_("Invalid serial number %s"), result_argv[1]);
2034                 g_assert_not_reached();
2035             }
2036         }
2037
2038         switch(cmd) {
2039
2040         case PARTIAL: /* PARTIAL <handle> <dumpsize> <errstr> */
2041         case DONE: /* DONE <handle> <dumpsize> <errstr> */
2042             if(result_argc != 4) {
2043                 error(_("error [chunker %s result_argc != 4: %d]"), cmdstr[cmd],
2044                       result_argc);
2045                 /*NOTREACHED*/
2046             }
2047             /*free_serial(result_argv[1]);*/
2048
2049             sched(dp)->dumpsize = (off_t)atof(result_argv[2]);
2050
2051             qname = quote_string(dp->name);
2052             g_printf(_("driver: finished-cmd time %s %s chunked %s:%s\n"),
2053                    walltime_str(curclock()), chunker->name,
2054                    dp->host->hostname, qname);
2055             fflush(stdout);
2056             amfree(qname);
2057
2058             event_release(chunker->ev_read);
2059
2060             chunker->result = cmd;
2061
2062             break;
2063
2064         case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
2065             event_release(chunker->ev_read);
2066
2067             chunker->result = cmd;
2068
2069             break;
2070         case FAILED: /* FAILED <handle> <errstr> */
2071             /*free_serial(result_argv[1]);*/
2072
2073             event_release(chunker->ev_read);
2074
2075             chunker->result = cmd;
2076
2077             break;
2078
2079         case NO_ROOM: /* NO-ROOM <handle> <missing_size> */
2080             if (!h || activehd < 0) { /* should never happen */
2081                 error(_("!h || activehd < 0"));
2082                 /*NOTREACHED*/
2083             }
2084             h[activehd]->used -= OFF_T_ATOI(result_argv[2]);
2085             h[activehd]->reserved -= OFF_T_ATOI(result_argv[2]);
2086             h[activehd]->disk->allocated_space -= OFF_T_ATOI(result_argv[2]);
2087             h[activehd]->disk->disksize -= OFF_T_ATOI(result_argv[2]);
2088             break;
2089
2090         case RQ_MORE_DISK: /* RQ-MORE-DISK <handle> */
2091             if (!h || activehd < 0) { /* should never happen */
2092                 error(_("!h || activehd < 0"));
2093                 /*NOTREACHED*/
2094             }
2095             h[activehd]->disk->allocated_dumpers--;
2096             h[activehd]->used = h[activehd]->reserved;
2097             if( h[++activehd] ) { /* There's still some allocated space left.
2098                                    * Tell the dumper about it. */
2099                 sched(dp)->activehd++;
2100                 chunker_cmd( chunker, CONTINUE, dp, NULL );
2101             } else { /* !h[++activehd] - must allocate more space */
2102                 sched(dp)->act_size = sched(dp)->est_size; /* not quite true */
2103                 sched(dp)->est_size = (sched(dp)->act_size/(off_t)20) * (off_t)21; /* +5% */
2104                 sched(dp)->est_size = am_round(sched(dp)->est_size, (off_t)DISK_BLOCK_KB);
2105                 if (sched(dp)->est_size < sched(dp)->act_size + 2*DISK_BLOCK_KB)
2106                     sched(dp)->est_size += 2 * DISK_BLOCK_KB;
2107                 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
2108                                     &dummy,
2109                                     h[activehd-1] );
2110                 if( !h ) {
2111                     /* No diskspace available. The reason for this will be
2112                      * determined in continue_port_dumps(). */
2113                     enqueue_disk( &roomq, dp );
2114                     continue_port_dumps();
2115                 } else {
2116                     /* OK, allocate space for disk and have chunker continue */
2117                     sched(dp)->activehd = assign_holdingdisk( h, dp );
2118                     chunker_cmd( chunker, CONTINUE, dp, NULL );
2119                     amfree(h);
2120                 }
2121             }
2122             break;
2123
2124         case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
2125             /*
2126              * We sent an ABORT from the NO-ROOM case because this dump
2127              * wasn't going to fit onto the holding disk.  We now need to
2128              * clean up the remains of this image, and try to finish
2129              * other dumps that are waiting on disk space.
2130              */
2131             /*assert(pending_aborts);*/
2132
2133             /*free_serial(result_argv[1]);*/
2134
2135             event_release(chunker->ev_read);
2136
2137             chunker->result = cmd;
2138
2139             break;
2140
2141         case BOGUS:
2142             /* either EOF or garbage from chunker.  Turn it off */
2143             log_add(L_WARNING, _("%s pid %ld is messed up, ignoring it.\n"),
2144                     chunker->name, (long)chunker->pid);
2145
2146             /* if it was dumping something, zap it and try again */
2147             g_assert(h && activehd >= 0);
2148             qname = quote_string(dp->name);
2149             if(sched(dp)->dump_attempted) {
2150                 log_add(L_FAIL, _("%s %s %s %d [%s died]"),
2151                         dp->host->hostname, qname, sched(dp)->datestamp,
2152                         sched(dp)->level, chunker->name);
2153             } else {
2154                 log_add(L_WARNING, _("%s died while dumping %s:%s lev %d."),
2155                         chunker->name, dp->host->hostname, qname,
2156                         sched(dp)->level);
2157             }
2158             amfree(qname);
2159             dp = NULL;
2160
2161             event_release(chunker->ev_read);
2162
2163             chunker->result = cmd;
2164
2165             break;
2166
2167         default:
2168             assert(0);
2169         }
2170         g_strfreev(result_argv);
2171
2172         if(chunker->result != LAST_TOK && chunker->dumper->result != LAST_TOK)
2173             dumper_chunker_result(dp);
2174
2175     } while(areads_dataready(chunker->fd));
2176 }
2177
2178
2179 static disklist_t
2180 read_flush(void)
2181 {
2182     sched_t *sp;
2183     disk_t *dp;
2184     int line;
2185     char *hostname, *diskname, *datestamp;
2186     int level;
2187     char *destname;
2188     disk_t *dp1;
2189     char *inpline = NULL;
2190     char *command;
2191     char *s;
2192     int ch;
2193     disklist_t tq;
2194     char *qname = NULL;
2195     char *qdestname = NULL;
2196
2197     tq.head = tq.tail = NULL;
2198
2199     for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
2200         dumpfile_t file;
2201
2202         line++;
2203         if (inpline[0] == '\0')
2204             continue;
2205
2206         s = inpline;
2207         ch = *s++;
2208
2209         skip_whitespace(s, ch);                 /* find the command */
2210         if(ch == '\0') {
2211             error(_("flush line %d: syntax error (no command)"), line);
2212             /*NOTREACHED*/
2213         }
2214         command = s - 1;
2215         skip_non_whitespace(s, ch);
2216         s[-1] = '\0';
2217
2218         if(strcmp(command,"ENDFLUSH") == 0) {
2219             break;
2220         }
2221
2222         if(strcmp(command,"FLUSH") != 0) {
2223             error(_("flush line %d: syntax error (%s != FLUSH)"), line, command);
2224             /*NOTREACHED*/
2225         }
2226
2227         skip_whitespace(s, ch);                 /* find the hostname */
2228         if(ch == '\0') {
2229             error(_("flush line %d: syntax error (no hostname)"), line);
2230             /*NOTREACHED*/
2231         }
2232         hostname = s - 1;
2233         skip_non_whitespace(s, ch);
2234         s[-1] = '\0';
2235
2236         skip_whitespace(s, ch);                 /* find the diskname */
2237         if(ch == '\0') {
2238             error(_("flush line %d: syntax error (no diskname)"), line);
2239             /*NOTREACHED*/
2240         }
2241         qname = s - 1;
2242         skip_quoted_string(s, ch);
2243         s[-1] = '\0';                           /* terminate the disk name */
2244         diskname = unquote_string(qname);
2245
2246         skip_whitespace(s, ch);                 /* find the datestamp */
2247         if(ch == '\0') {
2248             error(_("flush line %d: syntax error (no datestamp)"), line);
2249             /*NOTREACHED*/
2250         }
2251         datestamp = s - 1;
2252         skip_non_whitespace(s, ch);
2253         s[-1] = '\0';
2254
2255         skip_whitespace(s, ch);                 /* find the level number */
2256         if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
2257             error(_("flush line %d: syntax error (bad level)"), line);
2258             /*NOTREACHED*/
2259         }
2260         skip_integer(s, ch);
2261
2262         skip_whitespace(s, ch);                 /* find the filename */
2263         if(ch == '\0') {
2264             error(_("flush line %d: syntax error (no filename)"), line);
2265             /*NOTREACHED*/
2266         }
2267         qdestname = s - 1;
2268         skip_quoted_string(s, ch);
2269         s[-1] = '\0';
2270         destname = unquote_string(qdestname);
2271
2272         holding_file_get_dumpfile(destname, &file);
2273         if( file.type != F_DUMPFILE) {
2274             if( file.type != F_CONT_DUMPFILE )
2275                 log_add(L_INFO, _("%s: ignoring cruft file."), destname);
2276             amfree(diskname);
2277             amfree(destname);
2278             dumpfile_free_data(&file);
2279             continue;
2280         }
2281
2282         if(strcmp(hostname, file.name) != 0 ||
2283            strcmp(diskname, file.disk) != 0 ||
2284            strcmp(datestamp, file.datestamp) != 0) {
2285             log_add(L_INFO, _("disk %s:%s not consistent with file %s"),
2286                     hostname, diskname, destname);
2287             amfree(diskname);
2288             amfree(destname);
2289             dumpfile_free_data(&file);
2290             continue;
2291         }
2292         amfree(diskname);
2293
2294         dp = lookup_disk(file.name, file.disk);
2295
2296         if (dp == NULL) {
2297             log_add(L_INFO, _("%s: disk %s:%s not in database, skipping it."),
2298                     destname, file.name, file.disk);
2299             amfree(destname);
2300             dumpfile_free_data(&file);
2301             continue;
2302         }
2303
2304         if(file.dumplevel < 0 || file.dumplevel > 9) {
2305             log_add(L_INFO, _("%s: ignoring file with bogus dump level %d."),
2306                     destname, file.dumplevel);
2307             amfree(destname);
2308             dumpfile_free_data(&file);
2309             continue;
2310         }
2311
2312         if (holding_file_size(destname,1) <= 0) {
2313             log_add(L_INFO, "%s: removing file with no data.", destname);
2314             holding_file_unlink(destname);
2315             amfree(destname);
2316             dumpfile_free_data(&file);
2317             continue;
2318         }
2319
2320         dp1 = (disk_t *)alloc(SIZEOF(disk_t));
2321         *dp1 = *dp;
2322         dp1->next = dp1->prev = NULL;
2323
2324         /* add it to the flushhost list */
2325         if(!flushhost) {
2326             flushhost = alloc(SIZEOF(am_host_t));
2327             flushhost->next = NULL;
2328             flushhost->hostname = stralloc("FLUSHHOST");
2329             flushhost->up = NULL;
2330             flushhost->features = NULL;
2331         }
2332         dp1->hostnext = flushhost->disks;
2333         flushhost->disks = dp1;
2334
2335         sp = (sched_t *) alloc(SIZEOF(sched_t));
2336         sp->destname = destname;
2337         sp->level = file.dumplevel;
2338         sp->dumpdate = NULL;
2339         sp->degr_dumpdate = NULL;
2340         sp->degr_mesg = NULL;
2341         sp->datestamp = stralloc(file.datestamp);
2342         sp->est_nsize = (off_t)0;
2343         sp->est_csize = (off_t)0;
2344         sp->est_time = 0;
2345         sp->est_kps = 10;
2346         sp->priority = 0;
2347         sp->degr_level = -1;
2348         sp->dump_attempted = 0;
2349         sp->taper_attempted = 0;
2350         sp->act_size = holding_file_size(destname, 0);
2351         sp->holdp = build_diskspace(destname);
2352         if(sp->holdp == NULL) continue;
2353         sp->dumper = NULL;
2354         sp->timestamp = (time_t)0;
2355
2356         dp1->up = (char *)sp;
2357
2358         enqueue_disk(&tq, dp1);
2359         dumpfile_free_data(&file);
2360     }
2361     amfree(inpline);
2362
2363     /*@i@*/ return tq;
2364 }
2365
2366 static void
2367 read_schedule(
2368     void *      cookie)
2369 {
2370     sched_t *sp;
2371     disk_t *dp;
2372     int level, line, priority;
2373     char *dumpdate, *degr_dumpdate, *degr_mesg;
2374     int degr_level;
2375     time_t time, degr_time;
2376     time_t *time_p = &time;
2377     time_t *degr_time_p = &degr_time;
2378     off_t nsize, csize, degr_nsize, degr_csize;
2379     unsigned long kps, degr_kps;
2380     char *hostname, *features, *diskname, *datestamp, *inpline = NULL;
2381     char *command;
2382     char *s;
2383     int ch;
2384     off_t flush_size = (off_t)0;
2385     char *qname = NULL;
2386     long long time_;
2387     long long nsize_;
2388     long long csize_;
2389     long long degr_nsize_;
2390     long long degr_csize_;
2391
2392     (void)cookie;       /* Quiet unused parameter warning */
2393
2394     event_release(schedule_ev_read);
2395
2396     /* read schedule from stdin */
2397
2398     for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
2399         if (inpline[0] == '\0')
2400             continue;
2401         line++;
2402
2403         s = inpline;
2404         ch = *s++;
2405
2406         skip_whitespace(s, ch);                 /* find the command */
2407         if(ch == '\0') {
2408             error(_("schedule line %d: syntax error (no command)"), line);
2409             /*NOTREACHED*/
2410         }
2411         command = s - 1;
2412         skip_non_whitespace(s, ch);
2413         s[-1] = '\0';
2414
2415         if(strcmp(command,"DUMP") != 0) {
2416             error(_("schedule line %d: syntax error (%s != DUMP)"), line, command);
2417             /*NOTREACHED*/
2418         }
2419
2420         skip_whitespace(s, ch);                 /* find the host name */
2421         if(ch == '\0') {
2422             error(_("schedule line %d: syntax error (no host name)"), line);
2423             /*NOTREACHED*/
2424         }
2425         hostname = s - 1;
2426         skip_non_whitespace(s, ch);
2427         s[-1] = '\0';
2428
2429         skip_whitespace(s, ch);                 /* find the feature list */
2430         if(ch == '\0') {
2431             error(_("schedule line %d: syntax error (no feature list)"), line);
2432             /*NOTREACHED*/
2433         }
2434         features = s - 1;
2435         skip_non_whitespace(s, ch);
2436         s[-1] = '\0';
2437
2438         skip_whitespace(s, ch);                 /* find the disk name */
2439         if(ch == '\0') {
2440             error(_("schedule line %d: syntax error (no disk name)"), line);
2441             /*NOTREACHED*/
2442         }
2443         qname = s - 1;
2444         skip_quoted_string(s, ch);
2445         s[-1] = '\0';                           /* terminate the disk name */
2446         diskname = unquote_string(qname);
2447
2448         skip_whitespace(s, ch);                 /* find the datestamp */
2449         if(ch == '\0') {
2450             error(_("schedule line %d: syntax error (no datestamp)"), line);
2451             /*NOTREACHED*/
2452         }
2453         datestamp = s - 1;
2454         skip_non_whitespace(s, ch);
2455         s[-1] = '\0';
2456
2457         skip_whitespace(s, ch);                 /* find the priority number */
2458         if(ch == '\0' || sscanf(s - 1, "%d", &priority) != 1) {
2459             error(_("schedule line %d: syntax error (bad priority)"), line);
2460             /*NOTREACHED*/
2461         }
2462         skip_integer(s, ch);
2463
2464         skip_whitespace(s, ch);                 /* find the level number */
2465         if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
2466             error(_("schedule line %d: syntax error (bad level)"), line);
2467             /*NOTREACHED*/
2468         }
2469         skip_integer(s, ch);
2470
2471         skip_whitespace(s, ch);                 /* find the dump date */
2472         if(ch == '\0') {
2473             error(_("schedule line %d: syntax error (bad dump date)"), line);
2474             /*NOTREACHED*/
2475         }
2476         dumpdate = s - 1;
2477         skip_non_whitespace(s, ch);
2478         s[-1] = '\0';
2479
2480         skip_whitespace(s, ch);                 /* find the native size */
2481         nsize_ = (off_t)0;
2482         if(ch == '\0' || sscanf(s - 1, "%lld", &nsize_) != 1) {
2483             error(_("schedule line %d: syntax error (bad nsize)"), line);
2484             /*NOTREACHED*/
2485         }
2486         nsize = (off_t)nsize_;
2487         skip_integer(s, ch);
2488
2489         skip_whitespace(s, ch);                 /* find the compressed size */
2490         csize_ = (off_t)0;
2491         if(ch == '\0' || sscanf(s - 1, "%lld", &csize_) != 1) {
2492             error(_("schedule line %d: syntax error (bad csize)"), line);
2493             /*NOTREACHED*/
2494         }
2495         csize = (off_t)csize_;
2496         skip_integer(s, ch);
2497
2498         skip_whitespace(s, ch);                 /* find the time number */
2499         if(ch == '\0' || sscanf(s - 1, "%lld", &time_) != 1) {
2500             error(_("schedule line %d: syntax error (bad estimated time)"), line);
2501             /*NOTREACHED*/
2502         }
2503         *time_p = (time_t)time_;
2504         skip_integer(s, ch);
2505
2506         skip_whitespace(s, ch);                 /* find the kps number */
2507         if(ch == '\0' || sscanf(s - 1, "%lu", &kps) != 1) {
2508             error(_("schedule line %d: syntax error (bad kps)"), line);
2509             continue;
2510         }
2511         skip_integer(s, ch);
2512
2513         degr_dumpdate = NULL;                   /* flag if degr fields found */
2514         skip_whitespace(s, ch);                 /* find the degr level number */
2515         degr_mesg = NULL;
2516         if (ch == '"') {
2517             qname = s - 1;
2518             skip_quoted_string(s, ch);
2519             s[-1] = '\0';                       /* terminate degr mesg */
2520             degr_mesg = unquote_string(qname);
2521             degr_level = -1;
2522             degr_nsize = (off_t)0;
2523             degr_csize = (off_t)0;
2524             degr_time = (time_t)0;
2525             degr_kps = 0;
2526         } else if (ch != '\0') {
2527             if(sscanf(s - 1, "%d", &degr_level) != 1) {
2528                 error(_("schedule line %d: syntax error (bad degr level)"), line);
2529                 /*NOTREACHED*/
2530             }
2531             skip_integer(s, ch);
2532
2533             skip_whitespace(s, ch);             /* find the degr dump date */
2534             if(ch == '\0') {
2535                 error(_("schedule line %d: syntax error (bad degr dump date)"), line);
2536                 /*NOTREACHED*/
2537             }
2538             degr_dumpdate = s - 1;
2539             skip_non_whitespace(s, ch);
2540             s[-1] = '\0';
2541
2542             skip_whitespace(s, ch);             /* find the degr native size */
2543             degr_nsize_ = (off_t)0;
2544             if(ch == '\0'  || sscanf(s - 1, "%lld", &degr_nsize_) != 1) {
2545                 error(_("schedule line %d: syntax error (bad degr nsize)"), line);
2546                 /*NOTREACHED*/
2547             }
2548             degr_nsize = (off_t)degr_nsize_;
2549             skip_integer(s, ch);
2550
2551             skip_whitespace(s, ch);             /* find the degr compressed size */
2552             degr_csize_ = (off_t)0;
2553             if(ch == '\0'  || sscanf(s - 1, "%lld", &degr_csize_) != 1) {
2554                 error(_("schedule line %d: syntax error (bad degr csize)"), line);
2555                 /*NOTREACHED*/
2556             }
2557             degr_csize = (off_t)degr_csize_;
2558             skip_integer(s, ch);
2559
2560             skip_whitespace(s, ch);             /* find the degr time number */
2561             if(ch == '\0' || sscanf(s - 1, "%lld", &time_) != 1) {
2562                 error(_("schedule line %d: syntax error (bad degr estimated time)"), line);
2563                 /*NOTREACHED*/
2564             }
2565             *degr_time_p = (time_t)time_;
2566             skip_integer(s, ch);
2567
2568             skip_whitespace(s, ch);             /* find the degr kps number */
2569             if(ch == '\0' || sscanf(s - 1, "%lu", &degr_kps) != 1) {
2570                 error(_("schedule line %d: syntax error (bad degr kps)"), line);
2571                 /*NOTREACHED*/
2572             }
2573             skip_integer(s, ch);
2574         } else {
2575             error(_("schedule line %d: no degraded estimate or message"), line);
2576         }
2577
2578         dp = lookup_disk(hostname, diskname);
2579         if(dp == NULL) {
2580             log_add(L_WARNING,
2581                     _("schedule line %d: %s:'%s' not in disklist, ignored"),
2582                     line, hostname, qname);
2583             amfree(diskname);
2584             continue;
2585         }
2586
2587         sp = (sched_t *) alloc(SIZEOF(sched_t));
2588         /*@ignore@*/
2589         sp->level = level;
2590         sp->dumpdate = stralloc(dumpdate);
2591         sp->est_nsize = DISK_BLOCK_KB + nsize; /* include header */
2592         sp->est_csize = DISK_BLOCK_KB + csize; /* include header */
2593         /* round estimate to next multiple of DISK_BLOCK_KB */
2594         sp->est_csize = am_round(sp->est_csize, DISK_BLOCK_KB);
2595         sp->est_size = sp->est_csize;
2596         sp->est_time = time;
2597         sp->est_kps = kps;
2598         sp->priority = priority;
2599         sp->datestamp = stralloc(datestamp);
2600
2601         if(degr_dumpdate) {
2602             sp->degr_level = degr_level;
2603             sp->degr_dumpdate = stralloc(degr_dumpdate);
2604             sp->degr_nsize = DISK_BLOCK_KB + degr_nsize;
2605             sp->degr_csize = DISK_BLOCK_KB + degr_csize;
2606             /* round estimate to next multiple of DISK_BLOCK_KB */
2607             sp->degr_csize = am_round(sp->degr_csize, DISK_BLOCK_KB);
2608             sp->degr_time = degr_time;
2609             sp->degr_kps = degr_kps;
2610             sp->degr_mesg = NULL;
2611         } else {
2612             sp->degr_level = -1;
2613             sp->degr_dumpdate = NULL;
2614             sp->degr_mesg = degr_mesg;
2615         }
2616         /*@end@*/
2617
2618         sp->dump_attempted = 0;
2619         sp->taper_attempted = 0;
2620         sp->act_size = 0;
2621         sp->holdp = NULL;
2622         sp->activehd = -1;
2623         sp->dumper = NULL;
2624         sp->timestamp = (time_t)0;
2625         sp->destname = NULL;
2626         sp->no_space = 0;
2627
2628         dp->up = (char *) sp;
2629         if(dp->host->features == NULL) {
2630             dp->host->features = am_string_to_feature(features);
2631             if (!dp->host->features) {
2632                 log_add(L_WARNING,
2633                     _("Invalid feature string from client '%s'"),
2634                     features);
2635                 dp->host->features = am_set_default_feature_set();
2636             }
2637         }
2638         remove_disk(&waitq, dp);
2639         if (dp->to_holdingdisk == HOLD_NEVER) {
2640             enqueue_disk(&directq, dp);
2641         } else {
2642             enqueue_disk(&runq, dp);
2643         }
2644         flush_size += sp->act_size;
2645         amfree(diskname);
2646     }
2647     g_printf(_("driver: flush size %lld\n"), (long long)flush_size);
2648     amfree(inpline);
2649     if(line == 0)
2650         log_add(L_WARNING, _("WARNING: got empty schedule from planner"));
2651     if(need_degraded==1) start_degraded_mode(&runq);
2652     schedule_done = 1;
2653     start_some_dumps(&runq);
2654 }
2655
2656 static unsigned long
2657 free_kps(
2658     netif_t *ip)
2659 {
2660     unsigned long res;
2661
2662     if (ip == NULL) {
2663         netif_t *p;
2664         unsigned long maxusage=0;
2665         unsigned long curusage=0;
2666         for(p = disklist_netifs(); p != NULL; p = p->next) {
2667             maxusage += interface_get_maxusage(p->config);
2668             curusage += p->curusage;
2669         }
2670         if (maxusage >= curusage)
2671             res = maxusage - curusage;
2672         else
2673             res = 0;
2674 #ifndef __lint
2675     } else {
2676         if ((unsigned long)interface_get_maxusage(ip->config) >= ip->curusage)
2677             res = interface_get_maxusage(ip->config) - ip->curusage;
2678         else
2679             res = 0;
2680 #endif
2681     }
2682
2683     return res;
2684 }
2685
2686 static void
2687 interface_state(
2688     char *time_str)
2689 {
2690     netif_t *ip;
2691
2692     g_printf(_("driver: interface-state time %s"), time_str);
2693
2694     for(ip = disklist_netifs(); ip != NULL; ip = ip->next) {
2695         g_printf(_(" if %s: free %lu"), interface_name(ip->config), free_kps(ip));
2696     }
2697     g_printf("\n");
2698 }
2699
2700 static void
2701 allocate_bandwidth(
2702     netif_t *           ip,
2703     unsigned long       kps)
2704 {
2705     ip->curusage += kps;
2706 }
2707
2708 static void
2709 deallocate_bandwidth(
2710     netif_t *           ip,
2711     unsigned long       kps)
2712 {
2713     assert(kps <= ip->curusage);
2714     ip->curusage -= kps;
2715 }
2716
2717 /* ------------ */
2718 static off_t
2719 free_space(void)
2720 {
2721     holdalloc_t *ha;
2722     off_t total_free;
2723     off_t diff;
2724
2725     total_free = (off_t)0;
2726     for(ha = holdalloc; ha != NULL; ha = ha->next) {
2727         diff = ha->disksize - ha->allocated_space;
2728         if(diff > (off_t)0)
2729             total_free += diff;
2730     }
2731     return total_free;
2732 }
2733
2734 /*
2735  * We return an array of pointers to assignedhd_t. The array contains at
2736  * most one entry per holding disk. The list of pointers is terminated by
2737  * a NULL pointer. Each entry contains a pointer to a holdingdisk and
2738  * how much diskspace to use on that disk. Later on, assign_holdingdisk
2739  * will allocate the given amount of space.
2740  * If there is not enough room on the holdingdisks, NULL is returned.
2741  */
2742
2743 static assignedhd_t **
2744 find_diskspace(
2745     off_t               size,
2746     int *               cur_idle,
2747     assignedhd_t *      pref)
2748 {
2749     assignedhd_t **result = NULL;
2750     holdalloc_t *ha, *minp;
2751     int i=0;
2752     int j, minj;
2753     char *used;
2754     off_t halloc, dalloc, hfree, dfree;
2755
2756     (void)cur_idle;     /* Quiet unused parameter warning */
2757
2758     if (size < 2*DISK_BLOCK_KB)
2759         size = 2*DISK_BLOCK_KB;
2760     size = am_round(size, (off_t)DISK_BLOCK_KB);
2761
2762     hold_debug(1, _("find_diskspace: want %lld K\n"),
2763                    (long long)size);
2764
2765     used = alloc(SIZEOF(*used) * num_holdalloc);/*disks used during this run*/
2766     memset( used, 0, (size_t)num_holdalloc );
2767     result = alloc(SIZEOF(assignedhd_t *) * (num_holdalloc + 1));
2768     result[0] = NULL;
2769
2770     while( i < num_holdalloc && size > (off_t)0 ) {
2771         /* find the holdingdisk with the fewest active dumpers and among
2772          * those the one with the biggest free space
2773          */
2774         minp = NULL; minj = -1;
2775         for(j = 0, ha = holdalloc; ha != NULL; ha = ha->next, j++ ) {
2776             if( pref && pref->disk == ha && !used[j] &&
2777                 ha->allocated_space <= ha->disksize - (off_t)DISK_BLOCK_KB) {
2778                 minp = ha;
2779                 minj = j;
2780                 break;
2781             }
2782             else if( ha->allocated_space <= ha->disksize - (off_t)(2*DISK_BLOCK_KB) &&
2783                 !used[j] &&
2784                 (!minp ||
2785                  ha->allocated_dumpers < minp->allocated_dumpers ||
2786                  (ha->allocated_dumpers == minp->allocated_dumpers &&
2787                   ha->disksize-ha->allocated_space > minp->disksize-minp->allocated_space)) ) {
2788                 minp = ha;
2789                 minj = j;
2790             }
2791         }
2792
2793         pref = NULL;
2794         if( !minp ) { break; } /* all holding disks are full */
2795         used[minj] = 1;
2796
2797         /* hfree = free space on the disk */
2798         hfree = minp->disksize - minp->allocated_space;
2799
2800         /* dfree = free space for data, remove 1 header for each chunksize */
2801         dfree = hfree - (((hfree-(off_t)1)/holdingdisk_get_chunksize(minp->hdisk))+(off_t)1) * (off_t)DISK_BLOCK_KB;
2802
2803         /* dalloc = space I can allocate for data */
2804         dalloc = ( dfree < size ) ? dfree : size;
2805
2806         /* halloc = space to allocate, including 1 header for each chunksize */
2807         halloc = dalloc + (((dalloc-(off_t)1)/holdingdisk_get_chunksize(minp->hdisk))+(off_t)1) * (off_t)DISK_BLOCK_KB;
2808
2809         hold_debug(1, _("find_diskspace: find diskspace: size %lld hf %lld df %lld da %lld ha %lld\n"),
2810                        (long long)size,
2811                        (long long)hfree,
2812                        (long long)dfree,
2813                        (long long)dalloc,
2814                        (long long)halloc);
2815         size -= dalloc;
2816         result[i] = alloc(SIZEOF(assignedhd_t));
2817         result[i]->disk = minp;
2818         result[i]->reserved = halloc;
2819         result[i]->used = (off_t)0;
2820         result[i]->destname = NULL;
2821         result[i+1] = NULL;
2822         i++;
2823     }
2824     amfree(used);
2825
2826     if(size != (off_t)0) { /* not enough space available */
2827         g_printf(_("find diskspace: not enough diskspace. Left with %lld K\n"), (long long)size);
2828         fflush(stdout);
2829         free_assignedhd(result);
2830         result = NULL;
2831     }
2832
2833     if (debug_holding > 1) {
2834         for( i = 0; result && result[i]; i++ ) {
2835             hold_debug(1, _("find_diskspace: find diskspace: selected %s free %lld reserved %lld dumpers %d\n"),
2836                            holdingdisk_get_diskdir(result[i]->disk->hdisk),
2837                            (long long)(result[i]->disk->disksize -
2838                              result[i]->disk->allocated_space),
2839                            (long long)result[i]->reserved,
2840                            result[i]->disk->allocated_dumpers);
2841         }
2842     }
2843
2844     return result;
2845 }
2846
2847 static int
2848 assign_holdingdisk(
2849     assignedhd_t **     holdp,
2850     disk_t *            diskp)
2851 {
2852     int i, j, c, l=0;
2853     off_t size;
2854     char *sfn = sanitise_filename(diskp->name);
2855     char lvl[64];
2856     assignedhd_t **new_holdp;
2857     char *qname;
2858
2859     g_snprintf( lvl, SIZEOF(lvl), "%d", sched(diskp)->level );
2860
2861     size = am_round(sched(diskp)->est_size - sched(diskp)->act_size,
2862                     (off_t)DISK_BLOCK_KB);
2863
2864     for( c = 0; holdp[c]; c++ )
2865         (void)c; /* count number of disks */
2866
2867     /* allocate memory for sched(diskp)->holdp */
2868     for(j = 0; sched(diskp)->holdp && sched(diskp)->holdp[j]; j++)
2869         (void)j;        /* Quiet lint */
2870     new_holdp = (assignedhd_t **)alloc(SIZEOF(assignedhd_t*)*(j+c+1));
2871     if (sched(diskp)->holdp) {
2872         memcpy(new_holdp, sched(diskp)->holdp, j * SIZEOF(*new_holdp));
2873         amfree(sched(diskp)->holdp);
2874     }
2875     sched(diskp)->holdp = new_holdp;
2876     new_holdp = NULL;
2877
2878     i = 0;
2879     if( j > 0 ) { /* This is a request for additional diskspace. See if we can
2880                    * merge assignedhd_t's */
2881         l=j;
2882         if( sched(diskp)->holdp[j-1]->disk == holdp[0]->disk ) { /* Yes! */
2883             sched(diskp)->holdp[j-1]->reserved += holdp[0]->reserved;
2884             holdp[0]->disk->allocated_space += holdp[0]->reserved;
2885             size = (holdp[0]->reserved>size) ? (off_t)0 : size-holdp[0]->reserved;
2886             qname = quote_string(diskp->name);
2887             hold_debug(1, _("assign_holdingdisk: merging holding disk %s to disk %s:%s, add %lld for reserved %lld, left %lld\n"),
2888                            holdingdisk_get_diskdir(
2889                                                sched(diskp)->holdp[j-1]->disk->hdisk),
2890                            diskp->host->hostname, qname,
2891                            (long long)holdp[0]->reserved,
2892                            (long long)sched(diskp)->holdp[j-1]->reserved,
2893                            (long long)size);
2894             i++;
2895             amfree(qname);
2896             amfree(holdp[0]);
2897             l=j-1;
2898         }
2899     }
2900
2901     /* copy assignedhd_s to sched(diskp), adjust allocated_space */
2902     for( ; holdp[i]; i++ ) {
2903         holdp[i]->destname = newvstralloc( holdp[i]->destname,
2904                                            holdingdisk_get_diskdir(holdp[i]->disk->hdisk), "/",
2905                                            hd_driver_timestamp, "/",
2906                                            diskp->host->hostname, ".",
2907                                            sfn, ".",
2908                                            lvl, NULL );
2909         sched(diskp)->holdp[j++] = holdp[i];
2910         holdp[i]->disk->allocated_space += holdp[i]->reserved;
2911         size = (holdp[i]->reserved > size) ? (off_t)0 :
2912                   (size - holdp[i]->reserved);
2913         qname = quote_string(diskp->name);
2914         hold_debug(1,
2915                    _("assign_holdingdisk: %d assigning holding disk %s to disk %s:%s, reserved %lld, left %lld\n"),
2916                     i, holdingdisk_get_diskdir(holdp[i]->disk->hdisk),
2917                     diskp->host->hostname, qname,
2918                     (long long)holdp[i]->reserved,
2919                     (long long)size);
2920         amfree(qname);
2921         holdp[i] = NULL; /* so it doesn't get free()d... */
2922     }
2923     sched(diskp)->holdp[j] = NULL;
2924     amfree(sfn);
2925
2926     return l;
2927 }
2928
2929 static void
2930 adjust_diskspace(
2931     disk_t *    diskp,
2932     cmd_t       cmd)
2933 {
2934     assignedhd_t **holdp;
2935     off_t total = (off_t)0;
2936     off_t diff;
2937     int i;
2938     char *qname, *hqname, *qdest;
2939
2940     (void)cmd;  /* Quiet unused parameter warning */
2941
2942     qname = quote_string(diskp->name);
2943     qdest = quote_string(sched(diskp)->destname);
2944     hold_debug(1, _("adjust_diskspace: %s:%s %s\n"),
2945                    diskp->host->hostname, qname, qdest);
2946
2947     holdp = sched(diskp)->holdp;
2948
2949     assert(holdp != NULL);
2950
2951     for( i = 0; holdp[i]; i++ ) { /* for each allocated disk */
2952         diff = holdp[i]->used - holdp[i]->reserved;
2953         total += holdp[i]->used;
2954         holdp[i]->disk->allocated_space += diff;
2955         hqname = quote_string(holdingdisk_name(holdp[i]->disk->hdisk));
2956         hold_debug(1, _("adjust_diskspace: hdisk %s done, reserved %lld used %lld diff %lld alloc %lld dumpers %d\n"),
2957                        holdingdisk_name(holdp[i]->disk->hdisk),
2958                        (long long)holdp[i]->reserved,
2959                        (long long)holdp[i]->used,
2960                        (long long)diff,
2961                        (long long)holdp[i]->disk->allocated_space,
2962                        holdp[i]->disk->allocated_dumpers );
2963         holdp[i]->reserved += diff;
2964         amfree(hqname);
2965     }
2966
2967     sched(diskp)->act_size = total;
2968
2969     hold_debug(1, _("adjust_diskspace: after: disk %s:%s used %lld\n"),
2970                    diskp->host->hostname, qname,
2971                    (long long)sched(diskp)->act_size);
2972     amfree(qdest);
2973     amfree(qname);
2974 }
2975
2976 static void
2977 delete_diskspace(
2978     disk_t *diskp)
2979 {
2980     assignedhd_t **holdp;
2981     int i;
2982
2983     holdp = sched(diskp)->holdp;
2984
2985     assert(holdp != NULL);
2986
2987     for( i = 0; holdp[i]; i++ ) { /* for each disk */
2988         /* find all files of this dump on that disk, and subtract their
2989          * reserved sizes from the disk's allocated space
2990          */
2991         holdp[i]->disk->allocated_space -= holdp[i]->used;
2992     }
2993
2994     holding_file_unlink(holdp[0]->destname);    /* no need for the entire list,
2995                                                  * because holding_file_unlink
2996                                                  * will walk through all files
2997                                                  * using cont_filename */
2998     free_assignedhd(sched(diskp)->holdp);
2999     sched(diskp)->holdp = NULL;
3000     sched(diskp)->act_size = (off_t)0;
3001 }
3002
3003 static assignedhd_t **
3004 build_diskspace(
3005     char *      destname)
3006 {
3007     int i, j;
3008     int fd;
3009     size_t buflen;
3010     char buffer[DISK_BLOCK_BYTES];
3011     dumpfile_t file;
3012     assignedhd_t **result;
3013     holdalloc_t *ha;
3014     off_t *used;
3015     char dirname[1000], *ch;
3016     struct stat finfo;
3017     char *filename = destname;
3018
3019     memset(buffer, 0, sizeof(buffer));
3020     used = alloc(SIZEOF(off_t) * num_holdalloc);
3021     for(i=0;i<num_holdalloc;i++)
3022         used[i] = (off_t)0;
3023     result = alloc(SIZEOF(assignedhd_t *) * (num_holdalloc + 1));
3024     result[0] = NULL;
3025     while(filename != NULL && filename[0] != '\0') {
3026         strncpy(dirname, filename, 999);
3027         dirname[999]='\0';
3028         ch = strrchr(dirname,'/');
3029         if (ch) {
3030             *ch = '\0';
3031             ch = strrchr(dirname,'/');
3032             if (ch) {
3033                 *ch = '\0';
3034             }
3035         }
3036
3037         if (!ch) {
3038             g_fprintf(stderr,_("build_diskspace: bogus filename '%s'\n"), filename);
3039             amfree(used);
3040             amfree(result);
3041             return NULL;
3042         }
3043
3044         for(j = 0, ha = holdalloc; ha != NULL; ha = ha->next, j++ ) {
3045             if(strcmp(dirname, holdingdisk_get_diskdir(ha->hdisk))==0) {
3046                 break;
3047             }
3048         }
3049
3050         if(stat(filename, &finfo) == -1) {
3051             g_fprintf(stderr, _("stat %s: %s\n"), filename, strerror(errno));
3052             finfo.st_size = (off_t)0;
3053         }
3054         used[j] += ((off_t)finfo.st_size+(off_t)1023)/(off_t)1024;
3055         if((fd = open(filename,O_RDONLY)) == -1) {
3056             g_fprintf(stderr,_("build_diskspace: open of %s failed: %s\n"),
3057                     filename, strerror(errno));
3058             amfree(used);
3059             amfree(result);
3060             return NULL;
3061         }
3062         if ((buflen = full_read(fd, buffer, SIZEOF(buffer))) > 0) {;
3063                 parse_file_header(buffer, &file, buflen);
3064         }
3065         close(fd);
3066         filename = file.cont_filename;
3067     }
3068
3069     for(j = 0, i=0, ha = holdalloc; ha != NULL; ha = ha->next, j++ ) {
3070         if(used[j] != (off_t)0) {
3071             result[i] = alloc(SIZEOF(assignedhd_t));
3072             result[i]->disk = ha;
3073             result[i]->reserved = used[j];
3074             result[i]->used = used[j];
3075             result[i]->destname = stralloc(destname);
3076             result[i+1] = NULL;
3077             i++;
3078         }
3079     }
3080
3081     amfree(used);
3082     return result;
3083 }
3084
3085 static void
3086 holdingdisk_state(
3087     char *      time_str)
3088 {
3089     holdalloc_t *ha;
3090     int dsk;
3091     off_t diff;
3092
3093     g_printf(_("driver: hdisk-state time %s"), time_str);
3094
3095     for(ha = holdalloc, dsk = 0; ha != NULL; ha = ha->next, dsk++) {
3096         diff = ha->disksize - ha->allocated_space;
3097         g_printf(_(" hdisk %d: free %lld dumpers %d"), dsk,
3098                (long long)diff, ha->allocated_dumpers);
3099     }
3100     g_printf("\n");
3101 }
3102
3103 static void
3104 update_failed_dump(
3105     disk_t *    dp)
3106 {
3107     time_t save_timestamp = sched(dp)->timestamp;
3108     /* setting timestamp to 0 removes the current level from the
3109      * database, so that we ensure that it will not be bumped to the
3110      * next level on the next run.  If we didn't do this, dumpdates or
3111      * gnutar-lists might have been updated already, and a bumped
3112      * incremental might be created.  */
3113     sched(dp)->timestamp = 0;
3114     update_info_dumper(dp, (off_t)-1, (off_t)-1, (time_t)-1);
3115     sched(dp)->timestamp = save_timestamp;
3116 }
3117
3118 /* ------------------- */
3119 static void
3120 dump_to_tape(
3121     disk_t *    dp)
3122 {
3123     dumper_t *dumper;
3124     cmd_t cmd;
3125     int result_argc;
3126     char **result_argv;
3127     char *qname;
3128     disk_t *dp1;
3129
3130     qname = quote_string(dp->name);
3131     g_printf(_("driver: dumping %s:%s directly to tape\n"),
3132            dp->host->hostname, qname);
3133     fflush(stdout);
3134
3135     /* pick a dumper and fail if there are no idle dumpers */
3136
3137     dumper = idle_dumper();
3138     if (!dumper) {
3139         g_printf(_("driver: no idle dumpers for %s:%s.\n"), 
3140                 dp->host->hostname, qname);
3141         fflush(stdout);
3142         log_add(L_WARNING, _("no idle dumpers for %s:%s.\n"),
3143                 dp->host->hostname, qname);
3144         amfree(qname);
3145         return; /* fatal problem */
3146     }
3147
3148     /* tell the taper to read from a port number of its choice */
3149
3150     taper_cmd(PORT_WRITE, dp, NULL, sched(dp)->level, sched(dp)->datestamp);
3151     cmd = getresult(taper, 1, &result_argc, &result_argv);
3152     if(cmd != PORT) {
3153         g_printf(_("driver: did not get PORT from taper for %s:%s\n"),
3154                 dp->host->hostname, qname);
3155         fflush(stdout);
3156         log_add(L_WARNING, _("driver: did not get PORT from taper for %s:%s.\n"),
3157                 dp->host->hostname, qname);
3158         amfree(qname);
3159         return; /* fatal problem */
3160     }
3161     amfree(qname);
3162
3163     /* copy port number */
3164     dumper->output_port = atoi(result_argv[1]);
3165
3166     dumper->dp = dp;
3167     dumper->chunker = NULL;
3168     dumper->result = LAST_TOK;
3169     taper_result = LAST_TOK;
3170     sched(dp)->dumper = dumper;
3171
3172     if (dp->host->pre_script == 0) {
3173         for (dp1=dp->host->disks; dp1 != NULL; dp1 = dp1->hostnext) {
3174             run_server_scripts(EXECUTE_ON_PRE_HOST_BACKUP,
3175                                get_config_name(), dp1, -1);
3176         }
3177         dp->host->pre_script = 1;
3178     }
3179     run_server_scripts(EXECUTE_ON_PRE_DLE_BACKUP, get_config_name(), dp,
3180                        sched(dp)->level);
3181
3182     /* tell the dumper to dump to a port */
3183     dumper_cmd(dumper, PORT_DUMP, dp, NULL);
3184     dp->host->start_t = time(NULL) + 15;
3185
3186     /* update statistics & print state */
3187
3188     taper_busy = dumper->busy = 1;
3189     taper_input_error = NULL;
3190     taper_tape_error = NULL;
3191     taper_dumper = dumper;
3192     taper_disk = dp;
3193     taper_first_label = NULL;
3194     taper_written = 0;
3195     taper_state |= TAPER_STATE_DUMP_TO_TAPE;
3196     sched(dp)->act_size = sched(dp)->est_size;
3197     dp->host->inprogress += 1;
3198     dp->inprogress = 1;
3199     sched(dp)->timestamp = time((time_t *)0);
3200     allocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
3201     idle_reason = NOT_IDLE;
3202
3203     short_dump_state();
3204
3205     dumper->ev_read = event_register(dumper->fd, EV_READFD,
3206                                      handle_dumper_result, dumper);
3207     taper_ev_read = event_register(taper, EV_READFD,
3208                                    handle_taper_result, NULL);
3209
3210     g_strfreev(result_argv);
3211 }
3212
3213 static int
3214 queue_length(
3215     disklist_t  q)
3216 {
3217     disk_t *p;
3218     int len;
3219
3220     for(len = 0, p = q.head; p != NULL; len++, p = p->next)
3221         (void)len;      /* Quiet lint */
3222     return len;
3223 }
3224
3225 static void
3226 short_dump_state(void)
3227 {
3228     int i, nidle;
3229     char *wall_time;
3230
3231     wall_time = walltime_str(curclock());
3232
3233     g_printf(_("driver: state time %s "), wall_time);
3234     g_printf(_("free kps: %lu space: %lld taper: "),
3235            free_kps(NULL),
3236            (long long)free_space());
3237     if(degraded_mode) g_printf(_("DOWN"));
3238     else if(!taper_busy) g_printf(_("idle"));
3239     else g_printf(_("writing"));
3240     nidle = 0;
3241     for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
3242     g_printf(_(" idle-dumpers: %d"), nidle);
3243     g_printf(_(" qlen tapeq: %d"), queue_length(tapeq));
3244     g_printf(_(" runq: %d"), queue_length(runq));
3245     g_printf(_(" roomq: %d"), queue_length(roomq));
3246     g_printf(_(" wakeup: %d"), (int)sleep_time);
3247     g_printf(_(" driver-idle: %s\n"), _(idle_strings[idle_reason]));
3248     interface_state(wall_time);
3249     holdingdisk_state(wall_time);
3250     fflush(stdout);
3251 }
3252
3253 static TapeAction tape_action(char **why_no_new_tape)
3254 {
3255     TapeAction result = TAPE_ACTION_NO_ACTION;
3256     dumper_t *dumper;
3257     disk_t   *dp;
3258     off_t dumpers_size;
3259     off_t runq_size;
3260     off_t directq_size;
3261     off_t tapeq_size;
3262     off_t sched_size;
3263     off_t dump_to_disk_size;
3264     int   dump_to_disk_terminated;
3265
3266     dumpers_size = 0;
3267     for(dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
3268         if (dumper->busy)
3269             dumpers_size += sched(dumper->dp)->est_size;
3270     }
3271     driver_debug(1, _("dumpers_size: %lld\n"), (long long)dumpers_size);
3272
3273     runq_size = 0;
3274     for(dp = runq.head; dp != NULL; dp = dp->next) {
3275         runq_size += sched(dp)->est_size;
3276     }
3277     driver_debug(1, _("runq_size: %lld\n"), (long long)runq_size);
3278
3279     directq_size = 0;
3280     for(dp = directq.head; dp != NULL; dp = dp->next) {
3281         directq_size += sched(dp)->est_size;
3282     }
3283     driver_debug(1, _("directq_size: %lld\n"), (long long)directq_size);
3284
3285     tapeq_size = 0;
3286     for(dp = tapeq.head; dp != NULL; dp = dp->next) {
3287         tapeq_size += sched(dp)->act_size;
3288     }
3289     if (taper_disk) {
3290         tapeq_size += sched(taper_disk)->act_size - taper_written;
3291     }
3292     driver_debug(1, _("tapeq_size: %lld\n"), (long long)tapeq_size);
3293
3294     sched_size = runq_size + tapeq_size + dumpers_size;
3295     driver_debug(1, _("sched_size: %lld\n"), (long long)sched_size);
3296
3297     dump_to_disk_size = dumpers_size + runq_size;
3298     driver_debug(1, _("dump_to_disk_size: %lld\n"), (long long)dump_to_disk_size);
3299
3300     dump_to_disk_terminated = schedule_done && dump_to_disk_size == 0;
3301
3302     // Changing conditionals can produce a driver hang, take care.
3303     // 
3304     // when to start writting to a new tape
3305     if ((taper_state & TAPER_STATE_WAIT_FOR_TAPE) &&
3306         ((taper_state & TAPER_STATE_DUMP_TO_TAPE) ||    // for dump to tape
3307          !empty(directq) ||                             // if a dle is waiting for a dump to tape
3308          !empty(roomq) ||                               // holding disk constraint
3309          idle_reason == IDLE_NO_DISKSPACE ||            // holding disk constraint
3310          (flush_threshold_dumped < tapeq_size &&        // flush-threshold-dumped &&
3311           flush_threshold_scheduled < sched_size) ||    //  flush-threshold-scheduled
3312          (taperflush < tapeq_size &&                    // taperflush
3313           (force_flush == 1 ||                          //  if force_flush
3314            dump_to_disk_terminated))                    //  or all dump to disk terminated
3315         )) {
3316         result |= TAPE_ACTION_NEW_TAPE;
3317     // when to stop using new tape
3318     } else if ((taper_state & TAPER_STATE_WAIT_FOR_TAPE) &&
3319                (taperflush >= tapeq_size &&             // taperflush criteria not meet
3320                 (force_flush == 1 ||                    //  if force_flush
3321                  dump_to_disk_terminated))              //  or all dump to disk terminated
3322               ) {
3323         result |= TAPE_ACTION_NO_NEW_TAPE;
3324         if (flush_threshold_dumped >= tapeq_size) {
3325             *why_no_new_tape = _("flush-threshold-dumped criteria not met");
3326         } else if (flush_threshold_scheduled >= sched_size) {
3327             *why_no_new_tape = _("flush-threshold-scheduled criteria not met");
3328         } else {
3329             *why_no_new_tape = _("taperflush criteria not met");
3330         }
3331     }
3332
3333     // when to start a flush
3334     // We don't start a flush if taper_tape_started == 1 && dump_to_disk_terminated && force_flush == 0,
3335     // it is a criteria need to exit the first event_loop without flushing everything to tape,
3336     // they will be flush in another event_loop.
3337     if (!degraded_mode && !taper_busy && !empty(tapeq) &&
3338         (!((taper_state & TAPER_STATE_TAPE_STARTED) &&
3339             dump_to_disk_terminated && force_flush == 0) ||     // if tape already started and dump to disk not terminated
3340          ((taper_state & TAPER_STATE_TAPE_STARTED) &&
3341           force_flush == 1) ||                                  // if tape already started and force_flush
3342          !empty(roomq) ||                                       // holding disk constraint
3343          idle_reason == IDLE_NO_DISKSPACE ||                    // holding disk constraint
3344          (flush_threshold_dumped < tapeq_size &&                // flush-threshold-dumped &&
3345          flush_threshold_scheduled < sched_size) ||             //  flush-threshold-scheduled
3346          (force_flush == 1 && taperflush < tapeq_size))) {      // taperflush if force_flush
3347         result |= TAPE_ACTION_START_A_FLUSH;
3348     }
3349     return result;
3350 }
3351
3352 #if 0
3353 static void
3354 dump_state(
3355     const char *str)
3356 {
3357     int i;
3358     disk_t *dp;
3359     char *qname;
3360
3361     g_printf("================\n");
3362     g_printf(_("driver state at time %s: %s\n"), walltime_str(curclock()), str);
3363     g_printf(_("free kps: %lu, space: %lld\n"),
3364            free_kps(NULL),
3365            (long long)free_space());
3366     if(degraded_mode) g_printf(_("taper: DOWN\n"));
3367     else if(!taper_busy) g_printf(_("taper: idle\n"));
3368     else g_printf(_("taper: writing %s:%s.%d est size %lld\n"),
3369                 taper_disk->host->hostname, taper_disk->name,
3370                 sched(taper_disk)->level,
3371                 (long long)sched(taper_disk)->est_size);
3372     for(i = 0; i < inparallel; i++) {
3373         dp = dmptable[i].dp;
3374         if(!dmptable[i].busy)
3375           g_printf(_("%s: idle\n"), dmptable[i].name);
3376         else
3377           qname = quote_string(dp->name);
3378           g_printf(_("%s: dumping %s:%s.%d est kps %d size %lld time %lu\n"),
3379                 dmptable[i].name, dp->host->hostname, qname, sched(dp)->level,
3380                 sched(dp)->est_kps, (long long)sched(dp)->est_size, sched(dp)->est_time);
3381           amfree(qname);
3382     }
3383     dump_queue("TAPE", tapeq, 5, stdout);
3384     dump_queue("ROOM", roomq, 5, stdout);
3385     dump_queue("RUN ", runq, 5, stdout);
3386     g_printf("================\n");
3387     fflush(stdout);
3388 }
3389 #endif