Imported Upstream version 2.5.1
[debian/amanda] / server-src / driver.c
1 /*
2  * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3  * Copyright (c) 1991-1998 University of Maryland at College Park
4  * All Rights Reserved.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of U.M. not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  U.M. makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Authors: the Amanda Development Team.  Its members are listed in a
24  * file named AUTHORS, in the root directory of this distribution.
25  */
26 /*
27  * $Id: driver.c,v 1.198 2006/08/24 01:57:16 paddy_s Exp $
28  *
29  * controlling process for the Amanda backup system
30  */
31
32 /*
33  * XXX possibly modify tape queue to be cognizant of how much room is left on
34  *     tape.  Probably not effective though, should do this in planner.
35  */
36
37 #define HOLD_DEBUG
38
39 #include "amanda.h"
40 #include "clock.h"
41 #include "conffile.h"
42 #include "diskfile.h"
43 #include "event.h"
44 #include "holding.h"
45 #include "infofile.h"
46 #include "logfile.h"
47 #include "statfs.h"
48 #include "version.h"
49 #include "driverio.h"
50 #include "server_util.h"
51
52 static disklist_t waitq, runq, tapeq, roomq;
53 static int pending_aborts;
54 static disk_t *taper_disk;
55 static int degraded_mode;
56 static off_t reserved_space;
57 static off_t total_disksize;
58 static char *dumper_program;
59 static char *chunker_program;
60 static int  inparallel;
61 static int nodump = 0;
62 static off_t tape_length = (off_t)0;
63 static off_t tape_left = (off_t)0;
64 static int current_tape = 1;
65 static int conf_taperalgo;
66 static int conf_runtapes;
67 static time_t sleep_time;
68 static int idle_reason;
69 static char *driver_timestamp;
70 static char *hd_driver_timestamp;
71 static am_host_t *flushhost = NULL;
72 static int need_degraded=0;
73
74 static event_handle_t *dumpers_ev_time = NULL;
75 static event_handle_t *schedule_ev_read = NULL;
76
77 static int wait_children(int count);
78 static void wait_for_children(void);
79 static void allocate_bandwidth(interface_t *ip, unsigned long kps);
80 static int assign_holdingdisk(assignedhd_t **holdp, disk_t *diskp);
81 static void adjust_diskspace(disk_t *diskp, cmd_t cmd);
82 static void delete_diskspace(disk_t *diskp);
83 static assignedhd_t **build_diskspace(char *destname);
84 static int client_constrained(disk_t *dp);
85 static void deallocate_bandwidth(interface_t *ip, unsigned long kps);
86 static void dump_schedule(disklist_t *qp, char *str);
87 static int dump_to_tape(disk_t *dp);
88 static assignedhd_t **find_diskspace(off_t size, int *cur_idle,
89                                         assignedhd_t *preferred);
90 static unsigned long free_kps(interface_t *ip);
91 static off_t free_space(void);
92 static void dumper_result(disk_t *dp);
93 static void handle_dumper_result(void *);
94 static void handle_chunker_result(void *);
95 static void handle_dumpers_time(void *);
96 static void handle_taper_result(void *);
97 static void holdingdisk_state(char *time_str);
98 static dumper_t *idle_dumper(void);
99 static void interface_state(char *time_str);
100 static int queue_length(disklist_t q);
101 static disklist_t read_flush(void);
102 static void read_schedule(void *cookie);
103 static void short_dump_state(void);
104 static void startaflush(void);
105 static void start_degraded_mode(disklist_t *queuep);
106 static void start_some_dumps(disklist_t *rq);
107 static void continue_port_dumps(void);
108 static void update_failed_dump_to_tape(disk_t *);
109 #if 0
110 static void dump_state(const char *str);
111 #endif
112 int main(int main_argc, char **main_argv);
113
114 static const char *idle_strings[] = {
115 #define NOT_IDLE                0
116     "not-idle",
117 #define IDLE_NO_DUMPERS         1
118     "no-dumpers",
119 #define IDLE_START_WAIT         2
120     "start-wait",
121 #define IDLE_NO_HOLD            3
122     "no-hold",
123 #define IDLE_CLIENT_CONSTRAINED 4
124     "client-constrained",
125 #define IDLE_NO_DISKSPACE       5
126     "no-diskspace",
127 #define IDLE_TOO_LARGE          6
128     "file-too-large",
129 #define IDLE_NO_BANDWIDTH       7
130     "no-bandwidth",
131 #define IDLE_TAPER_WAIT         8
132     "taper-wait",
133 };
134
135 int
136 main(
137     int         main_argc,
138     char **     main_argv)
139 {
140     disklist_t origq;
141     disk_t *diskp;
142     int dsk;
143     dumper_t *dumper;
144     char *newdir = NULL;
145     generic_fs_stats_t fs;
146     holdingdisk_t *hdp;
147     unsigned long malloc_hist_1, malloc_size_1;
148     unsigned long malloc_hist_2, malloc_size_2;
149     unsigned long reserve = 100;
150     char *conffile;
151     char *conf_diskfile;
152     cmd_t cmd;
153     int result_argc;
154     char *result_argv[MAX_ARGS+1];
155     char *taper_program;
156     char *conf_tapetype;
157     tapetype_t *tape;
158     char *line;
159     int    new_argc,   my_argc;
160     char **new_argv, **my_argv;
161
162     safe_fd(-1, 0);
163
164     setvbuf(stdout, (char *)NULL, (int)_IOLBF, 0);
165     setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
166
167     set_pname("driver");
168
169     dbopen(DBG_SUBDIR_SERVER);
170
171     atexit(wait_for_children);
172
173     /* Don't die when child closes pipe */
174     signal(SIGPIPE, SIG_IGN);
175
176     malloc_size_1 = malloc_inuse(&malloc_hist_1);
177
178     erroutput_type = (ERR_AMANDALOG|ERR_INTERACTIVE);
179     set_logerror(logerror);
180
181     startclock();
182
183     parse_server_conf(main_argc, main_argv, &new_argc, &new_argv);
184     my_argc = new_argc;
185     my_argv = new_argv;
186
187     printf("%s: pid %ld executable %s version %s\n",
188            get_pname(), (long) getpid(), my_argv[0], version());
189
190     if (my_argc > 1) {
191         config_name = stralloc(my_argv[1]);
192         config_dir = vstralloc(CONFIG_DIR, "/", config_name, "/", NULL);
193         if(my_argc > 2) {
194             if(strncmp(my_argv[2], "nodump", 6) == 0) {
195                 nodump = 1;
196             }
197         }
198
199     } else {
200
201         char my_cwd[STR_SIZE];
202
203         if (getcwd(my_cwd, SIZEOF(my_cwd)) == NULL) {
204             error("cannot determine current working directory");
205             /*NOTREACHED*/
206         }
207         config_dir = stralloc2(my_cwd, "/");
208         if ((config_name = strrchr(my_cwd, '/')) != NULL) {
209             config_name = stralloc(config_name + 1);
210         }
211     }
212
213     safe_cd();
214
215     conffile = stralloc2(config_dir, CONFFILE_NAME);
216     if(read_conffile(conffile)) {
217         error("errors processing config file \"%s\"", conffile);
218         /*NOTREACHED*/
219     }
220     amfree(conffile);
221
222     dbrename(config_name, DBG_SUBDIR_SERVER);
223
224     report_bad_conf_arg();
225
226     amfree(driver_timestamp);
227     /* read timestamp from stdin */
228     while ((line = agets(stdin)) != NULL) {
229         if (line[0] != '\0')
230             break;
231         amfree(line);
232     }
233     if ( line == NULL ) {
234       error("Did not get DATE line from planner");
235       /*NOTREACHED*/
236     }
237     driver_timestamp = alloc(15);
238     strncpy(driver_timestamp, &line[5], 14);
239     driver_timestamp[14] = '\0';
240     amfree(line);
241     log_add(L_START,"date %s", driver_timestamp);
242
243     /* check that we don't do many dump in a day and usetimestamps is off */
244     if(strlen(driver_timestamp) == 8) {
245         char *conf_logdir = getconf_str(CNF_LOGDIR);
246         char *logfile    = vstralloc(conf_logdir, "/log.",
247                                      driver_timestamp, ".0", NULL);
248         char *oldlogfile = vstralloc(conf_logdir, "/oldlog/log.",
249                                      driver_timestamp, ".0", NULL);
250         if(access(logfile, F_OK) == 0 || access(oldlogfile, F_OK) == 0) {
251             log_add(L_WARNING, "WARNING: This is not the first amdump run today. Enable the usetimestamps option in the configuration file if you want to run amdump more than once per calendar day.");
252         }
253         amfree(oldlogfile);
254         amfree(logfile);
255         hd_driver_timestamp = construct_timestamp(NULL);
256     }
257     else {
258         hd_driver_timestamp = stralloc(driver_timestamp);
259     }
260
261     taper_program = vstralloc(libexecdir, "/", "taper", versionsuffix(), NULL);
262     dumper_program = vstralloc(libexecdir, "/", "dumper", versionsuffix(),
263                                NULL);
264     chunker_program = vstralloc(libexecdir, "/", "chunker", versionsuffix(),
265                                NULL);
266
267     conf_taperalgo = getconf_taperalgo(CNF_TAPERALGO);
268     conf_tapetype = getconf_str(CNF_TAPETYPE);
269     conf_runtapes = getconf_int(CNF_RUNTAPES);
270     tape = lookup_tapetype(conf_tapetype);
271     tape_length = tapetype_get_length(tape);
272     printf("driver: tape size " OFF_T_FMT "\n", (OFF_T_FMT_TYPE)tape_length);
273
274     /* start initializing: read in databases */
275
276     conf_diskfile = getconf_str(CNF_DISKFILE);
277     if (*conf_diskfile == '/') {
278         conf_diskfile = stralloc(conf_diskfile);
279     } else {
280         conf_diskfile = stralloc2(config_dir, conf_diskfile);
281     }
282     if (read_diskfile(conf_diskfile, &origq) < 0) {
283         error("could not load disklist \"%s\"", conf_diskfile);
284         /*NOTREACHED*/
285     }
286     amfree(conf_diskfile);
287
288     /* set up any configuration-dependent variables */
289
290     inparallel  = getconf_int(CNF_INPARALLEL);
291
292     reserve = (unsigned long)getconf_int(CNF_RESERVE);
293
294     total_disksize = (off_t)0;
295     for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
296         hdp->up = (void *)alloc(SIZEOF(holdalloc_t));
297         holdalloc(hdp)->allocated_dumpers = 0;
298         holdalloc(hdp)->allocated_space = (off_t)0;
299
300         if(get_fs_stats(holdingdisk_get_diskdir(hdp), &fs) == -1
301            || access(holdingdisk_get_diskdir(hdp), W_OK) == -1) {
302             log_add(L_WARNING, "WARNING: ignoring holding disk %s: %s\n",
303                     holdingdisk_get_diskdir(hdp), strerror(errno));
304             hdp->disksize = 0L;
305             continue;
306         }
307
308         if(fs.avail != (off_t)-1) {
309             if(hdp->disksize > (off_t)0) {
310                 if(hdp->disksize > fs.avail) {
311                     log_add(L_WARNING,
312                             "WARNING: %s: " OFF_T_FMT " KB requested, "
313                             "but only " OFF_T_FMT " KB available.",
314                             holdingdisk_get_diskdir(hdp),
315                             (OFF_T_FMT_TYPE)hdp->disksize,
316                             (OFF_T_FMT_TYPE)fs.avail);
317                             hdp->disksize = fs.avail;
318                 }
319             }
320             else if((fs.avail + hdp->disksize) < (off_t)0) {
321                 log_add(L_WARNING,
322                         "WARNING: %s: not " OFF_T_FMT " KB free.",
323                         holdingdisk_get_diskdir(hdp), -hdp->disksize);
324                 hdp->disksize = (off_t)0;
325                 continue;
326             }
327             else
328                 hdp->disksize += fs.avail;
329         }
330
331         printf("driver: adding holding disk %d dir %s size "
332                 OFF_T_FMT " chunksize " OFF_T_FMT "\n",
333                dsk, holdingdisk_get_diskdir(hdp),
334                (OFF_T_FMT_TYPE)hdp->disksize,
335                (OFF_T_FMT_TYPE)(holdingdisk_get_chunksize(hdp)));
336
337         newdir = newvstralloc(newdir,
338                               holdingdisk_get_diskdir(hdp), "/", hd_driver_timestamp,
339                               NULL);
340         if(!mkholdingdir(newdir)) {
341             hdp->disksize = (off_t)0;
342         }
343         total_disksize += hdp->disksize;
344     }
345
346     reserved_space = total_disksize * (off_t)(reserve / 100);
347
348     printf("reserving " OFF_T_FMT " out of " OFF_T_FMT
349            " for degraded-mode dumps\n",
350            (OFF_T_FMT_TYPE)reserved_space, (OFF_T_FMT_TYPE)free_space());
351
352     amfree(newdir);
353
354     if(inparallel > MAX_DUMPERS) inparallel = MAX_DUMPERS;
355
356     /* taper takes a while to get going, so start it up right away */
357
358     init_driverio();
359     if(conf_runtapes > 0) {
360         startup_tape_process(taper_program);
361         taper_cmd(START_TAPER, driver_timestamp, NULL, 0, NULL);
362     }
363
364     /* fire up the dumpers now while we are waiting */
365     if(!nodump) startup_dump_processes(dumper_program, inparallel, driver_timestamp);
366
367     /*
368      * Read schedule from stdin.  Usually, this is a pipe from planner,
369      * so the effect is that we wait here for the planner to
370      * finish, but meanwhile the taper is rewinding the tape, reading
371      * the label, checking it, writing a new label and all that jazz
372      * in parallel with the planner.
373      */
374
375     runq.head = NULL;
376     runq.tail = NULL;
377     waitq = origq;
378     tapeq = read_flush();
379
380     roomq.head = roomq.tail = NULL;
381
382     log_add(L_STATS, "startup time %s", walltime_str(curclock()));
383
384     printf("driver: start time %s inparallel %d bandwidth %lu diskspace "
385            OFF_T_FMT " ", walltime_str(curclock()), inparallel,
386            free_kps((interface_t *)0), (OFF_T_FMT_TYPE)free_space());
387     printf(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n",
388            "OBSOLETE", driver_timestamp, taperalgo2str(conf_taperalgo),
389            getconf_str(CNF_DUMPORDER));
390     fflush(stdout);
391
392     /* ok, planner is done, now lets see if the tape is ready */
393
394     if(conf_runtapes > 0) {
395         cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
396
397         if(cmd != TAPER_OK) {
398             /* no tape, go into degraded mode: dump to holding disk */
399             need_degraded=1;
400         }
401     }
402     else {
403         need_degraded=1;
404     }
405
406     tape_left = tape_length;
407     taper_busy = 0;
408     taper_disk = NULL;
409     taper_ev_read = NULL;
410     if(!need_degraded) startaflush();
411
412     if(!nodump)
413         schedule_ev_read = event_register((event_id_t)0, EV_READFD, read_schedule, NULL);
414
415     short_dump_state();
416     event_loop(0);
417
418     /* handle any remaining dumps by dumping directly to tape, if possible */
419
420     while(!empty(runq) && taper > 0) {
421         diskp = dequeue_disk(&runq);
422         if (diskp->to_holdingdisk == HOLD_REQUIRED) {
423             log_add(L_FAIL, "%s %s %s %d [%s]",
424                 diskp->host->hostname, diskp->name, sched(diskp)->datestamp,
425                 sched(diskp)->level,
426                 "can't dump required holdingdisk");
427         }
428         else if (!degraded_mode) {
429             int rc = dump_to_tape(diskp);
430             if(rc == 1)
431                 log_add(L_INFO,
432                         "%s %s %d [dump to tape failed, will try again]",
433                         diskp->host->hostname,
434                         diskp->name,
435                         sched(diskp)->level);
436             else if(rc == 2)
437                 log_add(L_FAIL, "%s %s %s %d [dump to tape failed]",
438                         diskp->host->hostname,
439                         diskp->name,
440                         sched(diskp)->datestamp,
441                         sched(diskp)->level);
442         }
443         else
444             log_add(L_FAIL, "%s %s %s %d [%s]",
445                 diskp->host->hostname, diskp->name, sched(diskp)->datestamp,
446                 sched(diskp)->level,
447                 diskp->to_holdingdisk == HOLD_AUTO ?
448                     "no more holding disk space" :
449                     "can't dump no-hold disk in degraded mode");
450     }
451
452     short_dump_state();                         /* for amstatus */
453
454     printf("driver: QUITTING time %s telling children to quit\n",
455            walltime_str(curclock()));
456     fflush(stdout);
457
458     if(!nodump) {
459         for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
460             if(dumper->fd >= 0)
461                 dumper_cmd(dumper, QUIT, NULL);
462         }
463     }
464
465     if(taper >= 0) {
466         taper_cmd(QUIT, NULL, NULL, 0, NULL);
467     }
468
469     /* wait for all to die */
470     wait_children(600);
471
472     for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
473         cleanup_holdingdisk(holdingdisk_get_diskdir(hdp), 0);
474         amfree(hdp->up);
475     }
476     amfree(newdir);
477
478     check_unfree_serial();
479     printf("driver: FINISHED time %s\n", walltime_str(curclock()));
480     fflush(stdout);
481     log_add(L_FINISH,"date %s time %s", driver_timestamp, walltime_str(curclock()));
482     amfree(driver_timestamp);
483
484     free_new_argv(new_argc, new_argv);
485     amfree(dumper_program);
486     amfree(taper_program);
487     amfree(config_dir);
488     amfree(config_name);
489
490     malloc_size_2 = malloc_inuse(&malloc_hist_2);
491
492     if(malloc_size_1 != malloc_size_2) {
493         malloc_list(fileno(stderr), malloc_hist_1, malloc_hist_2);
494     }
495
496     dbclose();
497
498     return 0;
499 }
500
501 /* sleep up to count seconds, and wait for terminating child process */
502 /* if sleep is negative, this function will not timeout              */
503 /* exit once all child process are finished or the timout expired    */
504 /* return 0 if no more children to wait                              */
505 /* return 1 if some children are still alive                         */
506 static int
507 wait_children(int count)
508 {
509     pid_t     pid;
510     amwait_t  retstat;
511     char     *who;
512     char     *what;
513     int       code=0;
514     dumper_t *dumper;
515     int       wait_errno;
516
517     do {
518         do {
519             pid = waitpid((pid_t)-1, &retstat, WNOHANG);
520             wait_errno = errno;
521             if (pid > 0) {
522                 what = NULL;
523                 if (! WIFEXITED(retstat)) {
524                     what = "signal";
525                     code = WTERMSIG(retstat);
526                 } else if (WEXITSTATUS(retstat) != 0) {
527                     what = "code";
528                     code = WEXITSTATUS(retstat);
529                 }
530                 who = NULL;
531                 for (dumper = dmptable; dumper < dmptable + inparallel;
532                      dumper++) {
533                     if (pid == dumper->pid) {
534                         who = stralloc(dumper->name);
535                         dumper->pid = -1;
536                         break;
537                     }
538                     if (pid == dumper->chunker->pid) {
539                         who = stralloc(dumper->chunker->name);
540                         dumper->chunker->pid = -1;
541                         break;
542                     }
543                 }
544                 if (who == NULL && pid == taper_pid) {
545                     who = stralloc("taper");
546                     taper_pid = -1;
547                 }
548                 if(what != NULL && who == NULL) {
549                     who = stralloc("unknown");
550                 }
551                 if(who && what) {
552                     log_add(L_WARNING, "%s pid %u exited with %s %d\n", who, 
553                             (unsigned)pid, what, code);
554                     printf("driver: %s pid %u exited with %s %d\n", who,
555                            (unsigned)pid, what, code);
556                 }
557                 amfree(who);
558             }
559         } while (pid > 0 || wait_errno == EINTR);
560         if (errno != ECHILD)
561             sleep(1);
562         if (count > 0)
563             count--;
564     } while ((errno != ECHILD) && (count != 0));
565     return (errno != ECHILD);
566 }
567
568 static void
569 kill_children(int signal)
570 {
571     dumper_t *dumper;
572
573     if(!nodump) {
574         for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
575             if (!dumper->down && dumper->pid > 1) {
576                 printf("driver: sending signal %d to %s pid %u\n", signal,
577                        dumper->name, (unsigned)dumper->pid);
578                 if (kill(dumper->pid, signal) == -1 && errno == ESRCH) {
579                     if (dumper->chunker)
580                         dumper->chunker->pid = 0;
581                 }
582                 if (dumper->chunker && dumper->chunker->pid > 1) {
583                     printf("driver: sending signal %d to %s pid %u\n", signal,
584                            dumper->chunker->name,
585                            (unsigned)dumper->chunker->pid);
586                     if (kill(dumper->chunker->pid, signal) == -1 &&
587                         errno == ESRCH)
588                         dumper->chunker->pid = 0;
589                 }
590             }
591         }
592     }
593
594     if(taper_pid > 1)
595         printf("driver: sending signal %d to %s pid %u\n", signal,
596                "taper", (unsigned)taper_pid);
597         if (kill(taper_pid, signal) == -1 && errno == ESRCH)
598             taper_pid = 0;
599 }
600
601 static void
602 wait_for_children(void)
603 {
604     dumper_t *dumper;
605
606     if(!nodump) {
607         for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
608             if (dumper->pid > 1 && dumper->fd >= 0) {
609                 dumper_cmd(dumper, QUIT, NULL);
610                 if (dumper->chunker && dumper->chunker->pid > 1 &&
611                     dumper->chunker->fd >= 0)
612                     chunker_cmd(dumper->chunker, QUIT, NULL);
613             }
614         }
615     }
616
617     if(taper_pid > 1 && taper > 0) {
618         taper_cmd(QUIT, NULL, NULL, 0, NULL);
619     }
620
621     if(wait_children(60) == 0)
622         return;
623
624     kill_children(SIGHUP);
625     if(wait_children(60) == 0)
626         return;
627
628     kill_children(SIGKILL);
629     if(wait_children(-1) == 0)
630         return;
631
632 }
633
634 static void
635 startaflush(void)
636 {
637     disk_t *dp = NULL;
638     disk_t *fit = NULL;
639     char *datestamp;
640     int extra_tapes = 0;
641     char *qname;
642
643     if(!degraded_mode && !taper_busy && !empty(tapeq)) {
644         
645         datestamp = sched(tapeq.head)->datestamp;
646         switch(conf_taperalgo) {
647         case ALGO_FIRST:
648                 dp = dequeue_disk(&tapeq);
649                 break;
650         case ALGO_FIRSTFIT:
651                 fit = tapeq.head;
652                 while (fit != NULL) {
653                     extra_tapes = (fit->tape_splitsize > (off_t)0) ? 
654                                         conf_runtapes - current_tape : 0;
655                     if(sched(fit)->act_size <= (tape_left +
656                              tape_length * (off_t)extra_tapes) &&
657                              strcmp(sched(fit)->datestamp, datestamp) <= 0) {
658                         dp = fit;
659                         fit = NULL;
660                     }
661                     else {
662                         fit = fit->next;
663                     }
664                 }
665                 if(dp) remove_disk(&tapeq, dp);
666                 break;
667         case ALGO_LARGEST:
668                 fit = dp = tapeq.head;
669                 while (fit != NULL) {
670                     if(sched(fit)->act_size > sched(dp)->act_size &&
671                        strcmp(sched(fit)->datestamp, datestamp) <= 0) {
672                         dp = fit;
673                     }
674                     fit = fit->next;
675                 }
676                 if(dp) remove_disk(&tapeq, dp);
677                 break;
678         case ALGO_LARGESTFIT:
679                 fit = tapeq.head;
680                 while (fit != NULL) {
681                     extra_tapes = (fit->tape_splitsize > (off_t)0) ? 
682                                         conf_runtapes - current_tape : 0;
683                     if(sched(fit)->act_size <=
684                        (tape_left + tape_length * (off_t)extra_tapes) &&
685                        (!dp || sched(fit)->act_size > sched(dp)->act_size) &&
686                        strcmp(sched(fit)->datestamp, datestamp) <= 0) {
687                         dp = fit;
688                     }
689                     fit = fit->next;
690                 }
691                 if(dp) remove_disk(&tapeq, dp);
692                 break;
693         case ALGO_SMALLEST:
694                 break;
695         case ALGO_LAST:
696                 dp = tapeq.tail;
697                 remove_disk(&tapeq, dp);
698                 break;
699         }
700         if(!dp) { /* ALGO_SMALLEST, or default if nothing fit. */
701             if(conf_taperalgo != ALGO_SMALLEST)  {
702                 fprintf(stderr,
703                    "driver: startaflush: Using SMALLEST because nothing fit\n");
704             }
705             fit = dp = tapeq.head;
706             while (fit != NULL) {
707                 if(sched(fit)->act_size < sched(dp)->act_size &&
708                    strcmp(sched(fit)->datestamp, datestamp) <= 0) {
709                     dp = fit;
710                 }
711                 fit = fit->next;
712             }
713             if(dp) remove_disk(&tapeq, dp);
714         }
715         if(taper_ev_read == NULL) {
716             taper_ev_read = event_register((event_id_t)taper, EV_READFD,
717                                            handle_taper_result, NULL);
718         }
719         if (dp) {
720             taper_disk = dp;
721             taper_busy = 1;
722             qname = quote_string(dp->name);
723             taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
724                       sched(dp)->datestamp);
725             fprintf(stderr,"driver: startaflush: %s %s %s "
726                     OFF_T_FMT " " OFF_T_FMT "\n",
727                     taperalgo2str(conf_taperalgo), dp->host->hostname, qname,
728                     (OFF_T_FMT_TYPE)sched(taper_disk)->act_size,
729                     (OFF_T_FMT_TYPE)tape_left);
730             if(sched(dp)->act_size <= tape_left)
731                 tape_left -= sched(dp)->act_size;
732             else
733                 tape_left = (off_t)0;
734             amfree(qname);
735         } else {
736             error("FATAL: Taper marked busy and no work found.");
737             /*NOTREACHED*/
738         }
739     } else if(!taper_busy && taper_ev_read != NULL) {
740         event_release(taper_ev_read);
741         taper_ev_read = NULL;
742     }
743 }
744
745
746 static int
747 client_constrained(
748     disk_t *    dp)
749 {
750     disk_t *dp2;
751
752     /* first, check if host is too busy */
753
754     if(dp->host->inprogress >= dp->host->maxdumps) {
755         return 1;
756     }
757
758     /* next, check conflict with other dumps on same spindle */
759
760     if(dp->spindle == -1) {     /* but spindle -1 never conflicts by def. */
761         return 0;
762     }
763
764     for(dp2 = dp->host->disks; dp2 != NULL; dp2 = dp2->hostnext)
765         if(dp2->inprogress && dp2->spindle == dp->spindle) {
766             return 1;
767         }
768
769     return 0;
770 }
771
772 static void
773 start_some_dumps(
774     disklist_t *        rq)
775 {
776     int cur_idle;
777     disk_t *diskp, *delayed_diskp, *diskp_accept;
778     assignedhd_t **holdp=NULL, **holdp_accept;
779     const time_t now = time(NULL);
780     cmd_t cmd;
781     int result_argc;
782     char *result_argv[MAX_ARGS+1];
783     chunker_t *chunker;
784     dumper_t *dumper;
785     char dumptype;
786     char *dumporder;
787
788     idle_reason = IDLE_NO_DUMPERS;
789     sleep_time = 0;
790
791     if(dumpers_ev_time != NULL) {
792         event_release(dumpers_ev_time);
793         dumpers_ev_time = NULL;
794     }
795
796     for (dumper = dmptable; dumper < dmptable+inparallel; dumper++) {
797
798         if( dumper->busy ) {
799             continue;
800         }
801
802         if (dumper->ev_read != NULL) {
803             event_release(dumper->ev_read);
804             dumper->ev_read = NULL;
805         }
806
807         /*
808          * A potential problem with starting from the bottom of the dump time
809          * distribution is that a slave host will have both one of the shortest
810          * and one of the longest disks, so starting its shortest disk first will
811          * tie up the host and eliminate its longest disk from consideration the
812          * first pass through.  This could cause a big delay in starting that long
813          * disk, which could drag out the whole night's dumps.
814          *
815          * While starting from the top of the dump time distribution solves the
816          * above problem, this turns out to be a bad idea, because the big dumps
817          * will almost certainly pack the holding disk completely, leaving no
818          * room for even one small dump to start.  This ends up shutting out the
819          * small-end dumpers completely (they stay idle).
820          *
821          * The introduction of multiple simultaneous dumps to one host alleviates
822          * the biggest&smallest dumps problem: both can be started at the
823          * beginning.
824          */
825
826         diskp_accept = NULL;
827         holdp_accept = NULL;
828         delayed_diskp = NULL;
829
830         cur_idle = NOT_IDLE;
831
832         dumporder = getconf_str(CNF_DUMPORDER);
833         if(strlen(dumporder) > (size_t)(dumper-dmptable)) {
834             dumptype = dumporder[dumper-dmptable];
835         }
836         else {
837             if(dumper-dmptable < 3)
838                 dumptype = 't';
839             else
840                 dumptype = 'T';
841         }
842
843         for(diskp = rq->head; diskp != NULL; diskp = diskp->next) {
844             assert(diskp->host != NULL && sched(diskp) != NULL);
845
846             if (diskp->host->start_t > now) {
847                 cur_idle = max(cur_idle, IDLE_START_WAIT);
848                 if (delayed_diskp == NULL || sleep_time > diskp->host->start_t) {
849                     delayed_diskp = diskp;
850                     sleep_time = diskp->host->start_t;
851                 }
852             } else if(diskp->start_t > now) {
853                 cur_idle = max(cur_idle, IDLE_START_WAIT);
854                 if (delayed_diskp == NULL || sleep_time > diskp->start_t) {
855                     delayed_diskp = diskp;
856                     sleep_time = diskp->start_t;
857                 }
858             } else if (diskp->host->netif->curusage > 0 &&
859                        sched(diskp)->est_kps > free_kps(diskp->host->netif)) {
860                 cur_idle = max(cur_idle, IDLE_NO_BANDWIDTH);
861             } else if(sched(diskp)->no_space) {
862                 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
863             } else if (diskp->to_holdingdisk == HOLD_NEVER) {
864                 cur_idle = max(cur_idle, IDLE_NO_HOLD);
865             } else if ((holdp =
866                 find_diskspace(sched(diskp)->est_size, &cur_idle, NULL)) == NULL) {
867                 cur_idle = max(cur_idle, IDLE_NO_DISKSPACE);
868             } else if (client_constrained(diskp)) {
869                 free_assignedhd(holdp);
870                 cur_idle = max(cur_idle, IDLE_CLIENT_CONSTRAINED);
871             } else {
872
873                 /* disk fits, dump it */
874                 int accept = !diskp_accept;
875                 if(!accept) {
876                     switch(dumptype) {
877                       case 's': accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
878                                 break;
879                       case 'S': accept = (sched(diskp)->est_size > sched(diskp_accept)->est_size);
880                                 break;
881                       case 't': accept = (sched(diskp)->est_time < sched(diskp_accept)->est_time);
882                                 break;
883                       case 'T': accept = (sched(diskp)->est_time > sched(diskp_accept)->est_time);
884                                 break;
885                       case 'b': accept = (sched(diskp)->est_kps < sched(diskp_accept)->est_kps);
886                                 break;
887                       case 'B': accept = (sched(diskp)->est_kps > sched(diskp_accept)->est_kps);
888                                 break;
889                       default:  log_add(L_WARNING, "Unknown dumporder character \'%c\', using 's'.\n",
890                                         dumptype);
891                                 accept = (sched(diskp)->est_size < sched(diskp_accept)->est_size);
892                                 break;
893                     }
894                 }
895                 if(accept) {
896                     if( !diskp_accept || !degraded_mode || diskp->priority >= diskp_accept->priority) {
897                         if(holdp_accept) free_assignedhd(holdp_accept);
898                         diskp_accept = diskp;
899                         holdp_accept = holdp;
900                     }
901                     else {
902                         free_assignedhd(holdp);
903                     }
904                 }
905                 else {
906                     free_assignedhd(holdp);
907                 }
908             }
909         }
910
911         diskp = diskp_accept;
912         holdp = holdp_accept;
913
914         idle_reason = max(idle_reason, cur_idle);
915
916         /*
917          * If we have no disk at this point, and there are disks that
918          * are delayed, then schedule a time event to call this dumper
919          * with the disk with the shortest delay.
920          */
921         if (diskp == NULL && delayed_diskp != NULL) {
922             assert(sleep_time > now);
923             sleep_time -= now;
924             dumpers_ev_time = event_register((event_id_t)sleep_time, EV_TIME,
925                 handle_dumpers_time, &runq);
926             return;
927         } else if (diskp != NULL) {
928             sched(diskp)->act_size = (off_t)0;
929             allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
930             sched(diskp)->activehd = assign_holdingdisk(holdp, diskp);
931             amfree(holdp);
932             sched(diskp)->destname = newstralloc(sched(diskp)->destname,
933                                                  sched(diskp)->holdp[0]->destname);
934             diskp->host->inprogress++;  /* host is now busy */
935             diskp->inprogress = 1;
936             sched(diskp)->dumper = dumper;
937             sched(diskp)->timestamp = now;
938
939             dumper->busy = 1;           /* dumper is now busy */
940             dumper->dp = diskp;         /* link disk to dumper */
941             remove_disk(rq, diskp);             /* take it off the run queue */
942
943             sched(diskp)->origsize = (off_t)-1;
944             sched(diskp)->dumpsize = (off_t)-1;
945             sched(diskp)->dumptime = (time_t)0;
946             sched(diskp)->tapetime = (time_t)0;
947             chunker = dumper->chunker;
948             chunker->result = LAST_TOK;
949             dumper->result = LAST_TOK;
950             startup_chunk_process(chunker,chunker_program);
951             chunker_cmd(chunker, START, (void *)driver_timestamp);
952             chunker->dumper = dumper;
953             chunker_cmd(chunker, PORT_WRITE, diskp);
954             cmd = getresult(chunker->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
955             if(cmd != PORT) {
956                 assignedhd_t **h=NULL;
957                 int activehd;
958
959                 printf("driver: did not get PORT from %s for %s:%s\n",
960                        chunker->name, diskp->host->hostname, diskp->name);
961                 fflush(stdout);
962
963                 deallocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
964                 h = sched(diskp)->holdp;
965                 activehd = sched(diskp)->activehd;
966                 h[activehd]->used = 0;
967                 holdalloc(h[activehd]->disk)->allocated_dumpers--;
968                 adjust_diskspace(diskp, DONE);
969                 delete_diskspace(diskp);
970                 diskp->host->inprogress--;
971                 diskp->inprogress = 0;
972                 sched(diskp)->dumper = NULL;
973                 dumper->busy = 0;
974                 dumper->dp = NULL;
975                 sched(diskp)->attempted++;
976                 free_serial_dp(diskp);
977                 if(sched(diskp)->attempted < 2)
978                     enqueue_disk(rq, diskp);
979             }
980             else {
981                 dumper->ev_read = event_register((event_id_t)dumper->fd, EV_READFD,
982                                                  handle_dumper_result, dumper);
983                 chunker->ev_read = event_register((event_id_t)chunker->fd, EV_READFD,
984                                                    handle_chunker_result, chunker);
985                 dumper->output_port = atoi(result_argv[2]);
986
987                 dumper_cmd(dumper, PORT_DUMP, diskp);
988             }
989             diskp->host->start_t = now + 15;
990         }
991     }
992 }
993
994 /*
995  * This gets called when a dumper is delayed for some reason.  It may
996  * be because a disk has a delayed start, or amanda is constrained
997  * by network or disk limits.
998  */
999
1000 static void
1001 handle_dumpers_time(
1002     void *      cookie)
1003 {
1004     disklist_t *runq = cookie;
1005     event_release(dumpers_ev_time);
1006     dumpers_ev_time = NULL; 
1007     start_some_dumps(runq);
1008 }
1009
1010 static void
1011 dump_schedule(
1012     disklist_t *qp,
1013     char *      str)
1014 {
1015     disk_t *dp;
1016     char *qname;
1017
1018     printf("dump of driver schedule %s:\n--------\n", str);
1019
1020     for(dp = qp->head; dp != NULL; dp = dp->next) {
1021         qname = quote_string(dp->name);
1022         printf("  %-20s %-25s lv %d t %5lu s " OFF_T_FMT " p %d\n",
1023                dp->host->hostname, qname, sched(dp)->level,
1024                sched(dp)->est_time,
1025                (OFF_T_FMT_TYPE)sched(dp)->est_size, sched(dp)->priority);
1026         amfree(qname);
1027     }
1028     printf("--------\n");
1029 }
1030
1031 static void
1032 start_degraded_mode(
1033     /*@keep@*/ disklist_t *queuep)
1034 {
1035     disk_t *dp;
1036     disklist_t newq;
1037     off_t est_full_size;
1038     char *qname;
1039
1040     if (taper_ev_read != NULL) {
1041         event_release(taper_ev_read);
1042         taper_ev_read = NULL;
1043     }
1044
1045     newq.head = newq.tail = 0;
1046
1047     dump_schedule(queuep, "before start degraded mode");
1048
1049     est_full_size = (off_t)0;
1050     while(!empty(*queuep)) {
1051         dp = dequeue_disk(queuep);
1052
1053         qname = quote_string(dp->name);
1054         if(sched(dp)->level != 0)
1055             /* go ahead and do the disk as-is */
1056             enqueue_disk(&newq, dp);
1057         else {
1058             if (reserved_space + est_full_size + sched(dp)->est_size
1059                 <= total_disksize) {
1060                 enqueue_disk(&newq, dp);
1061                 est_full_size += sched(dp)->est_size;
1062             }
1063             else if(sched(dp)->degr_level != -1) {
1064                 sched(dp)->level = sched(dp)->degr_level;
1065                 sched(dp)->dumpdate = sched(dp)->degr_dumpdate;
1066                 sched(dp)->est_nsize = sched(dp)->degr_nsize;
1067                 sched(dp)->est_csize = sched(dp)->degr_csize;
1068                 sched(dp)->est_time = sched(dp)->degr_time;
1069                 sched(dp)->est_kps  = sched(dp)->degr_kps;
1070                 enqueue_disk(&newq, dp);
1071             }
1072             else {
1073                 log_add(L_FAIL,"%s %s %s %d [can't switch to incremental dump]",
1074                         dp->host->hostname, qname, sched(dp)->datestamp,
1075                         sched(dp)->level);
1076             }
1077         }
1078         amfree(qname);
1079     }
1080
1081     /*@i@*/ *queuep = newq;
1082     degraded_mode = 1;
1083
1084     dump_schedule(queuep, "after start degraded mode");
1085 }
1086
1087
1088 static void
1089 continue_port_dumps(void)
1090 {
1091     disk_t *dp, *ndp;
1092     assignedhd_t **h;
1093     int active_dumpers=0, busy_dumpers=0, i;
1094     dumper_t *dumper;
1095
1096     /* First we try to grant diskspace to some dumps waiting for it. */
1097     for( dp = roomq.head; dp; dp = ndp ) {
1098         ndp = dp->next;
1099         /* find last holdingdisk used by this dump */
1100         for( i = 0, h = sched(dp)->holdp; h[i+1]; i++ ) {
1101             (void)h; /* Quiet lint */
1102         }
1103         /* find more space */
1104         h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1105                             &active_dumpers, h[i] );
1106         if( h ) {
1107             for(dumper = dmptable; dumper < dmptable + inparallel &&
1108                                    dumper->dp != dp; dumper++) {
1109                 (void)dp; /* Quiet lint */
1110             }
1111             assert( dumper < dmptable + inparallel );
1112             sched(dp)->activehd = assign_holdingdisk( h, dp );
1113             chunker_cmd( dumper->chunker, CONTINUE, dp );
1114             amfree(h);
1115             remove_disk( &roomq, dp );
1116         }
1117     }
1118
1119     /* So for some disks there is less holding diskspace available than
1120      * was asked for. Possible reasons are
1121      * a) diskspace has been allocated for other dumps which are
1122      *    still running or already being written to tape
1123      * b) all other dumps have been suspended due to lack of diskspace
1124      * c) this dump doesn't fit on all the holding disks
1125      * Case a) is not a problem. We just wait for the diskspace to
1126      * be freed by moving the current disk to a queue.
1127      * If case b) occurs, we have a deadlock situation. We select
1128      * a dump from the queue to be aborted and abort it. It will
1129      * be retried later dumping to disk.
1130      * If case c) is detected, the dump is aborted. Next time
1131      * it will be dumped directly to tape. Actually, case c is a special
1132      * manifestation of case b) where only one dumper is busy.
1133      */
1134     for(dp=NULL, dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
1135         if( dumper->busy ) {
1136             busy_dumpers++;
1137             if( !find_disk(&roomq, dumper->dp) ) {
1138                 active_dumpers++;
1139             } else if( !dp || 
1140                        sched(dp)->est_size > sched(dumper->dp)->est_size ) {
1141                 dp = dumper->dp;
1142             }
1143         }
1144     }
1145     if((dp != NULL) && (active_dumpers == 0) && (busy_dumpers > 0) && 
1146         ((!taper_busy && empty(tapeq)) || degraded_mode) &&
1147         pending_aborts == 0 ) { /* not case a */
1148         if( busy_dumpers == 1 ) { /* case c */
1149             sched(dp)->no_space = 1;
1150         }
1151         /* case b */
1152         /* At this time, dp points to the dump with the smallest est_size.
1153          * We abort that dump, hopefully not wasting too much time retrying it.
1154          */
1155         remove_disk( &roomq, dp );
1156         chunker_cmd( sched(dp)->dumper->chunker, ABORT, NULL);
1157         dumper_cmd( sched(dp)->dumper, ABORT, NULL );
1158         pending_aborts++;
1159     }
1160 }
1161
1162
1163 static void
1164 handle_taper_result(
1165     void *      cookie)
1166 {
1167     disk_t *dp;
1168     off_t filenum;
1169     cmd_t cmd;
1170     int result_argc;
1171     char *result_argv[MAX_ARGS+1];
1172     int avail_tapes = 0;
1173     
1174     (void)cookie;       /* Quiet unused parameter warning */
1175
1176     assert(cookie == NULL);
1177     
1178     do {
1179         
1180         short_dump_state();
1181         
1182         cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
1183         
1184         switch(cmd) {
1185             
1186         case PARTIAL:
1187         case DONE:      /* DONE <handle> <label> <tape file> <err mess> */
1188             if(result_argc != 5) {
1189                 error("error: [taper DONE result_argc != 5: %d", result_argc);
1190                 /*NOTREACHED*/
1191             }
1192             
1193             dp = serial2disk(result_argv[2]);
1194             free_serial(result_argv[2]);
1195             
1196             filenum = OFF_T_ATOI(result_argv[4]);
1197             if(cmd == DONE) {
1198                 update_info_taper(dp, result_argv[3], filenum,
1199                                   sched(dp)->level);
1200             }
1201             
1202             delete_diskspace(dp);
1203             
1204             printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1205                    walltime_str(curclock()), dp->host->hostname, dp->name);
1206             fflush(stdout);
1207             
1208             amfree(sched(dp)->destname);
1209             amfree(sched(dp)->dumpdate);
1210             amfree(sched(dp)->degr_dumpdate);
1211             amfree(sched(dp)->datestamp);
1212             amfree(dp->up);
1213             
1214             taper_busy = 0;
1215             taper_disk = NULL;
1216             startaflush();
1217             
1218             /* continue with those dumps waiting for diskspace */
1219             continue_port_dumps();
1220             break;
1221             
1222         case TRYAGAIN:  /* TRY-AGAIN <handle> <err mess> */
1223             if (result_argc < 2) {
1224                 error("error [taper TRYAGAIN result_argc < 2: %d]",
1225                       result_argc);
1226                 /*NOTREACHED*/
1227             }
1228             dp = serial2disk(result_argv[2]);
1229             free_serial(result_argv[2]);
1230             printf("driver: taper-tryagain time %s disk %s:%s\n",
1231                    walltime_str(curclock()), dp->host->hostname, dp->name);
1232             fflush(stdout);
1233             
1234             /* See how many tapes we have left, but we alwyays
1235                retry once (why?) */
1236             current_tape++;
1237             if(dp->tape_splitsize > (off_t)0)
1238                 avail_tapes = conf_runtapes - current_tape;
1239             else
1240                 avail_tapes = 0;
1241             
1242             if(sched(dp)->attempted > avail_tapes) {
1243                 log_add(L_FAIL, "%s %s %s %d [too many taper retries]",
1244                         dp->host->hostname, dp->name, sched(dp)->datestamp,
1245                         sched(dp)->level);
1246                 printf("driver: taper failed %s %s %s, too many taper retry\n",
1247                        result_argv[2], dp->host->hostname, dp->name);
1248             }
1249             else {
1250                 /* Re-insert into taper queue. */
1251                 sched(dp)->attempted++;
1252                 headqueue_disk(&tapeq, dp);
1253             }
1254             
1255             tape_left = tape_length;
1256             
1257             /* run next thing from queue */
1258             
1259             taper_busy = 0;
1260             taper_disk = NULL;
1261             startaflush();
1262             continue_port_dumps();
1263             break;
1264             
1265         case SPLIT_CONTINUE:  /* SPLIT_CONTINUE <handle> <new_label> */
1266             if (result_argc != 3) {
1267                 error("error [taper SPLIT_CONTINUE result_argc != 3: %d]",
1268                       result_argc);
1269                 /*NOTREACHED*/
1270             }
1271             
1272             break;
1273         case SPLIT_NEEDNEXT:  /* SPLIT-NEEDNEXT <handle> <kb written> */
1274             if (result_argc != 3) {
1275                 error("error [taper SPLIT_NEEDNEXT result_argc != 3: %d]",
1276                       result_argc);
1277                 /*NOTREACHED*/
1278             }
1279             
1280             /* Update our tape counter and reset tape_left */
1281             current_tape++;
1282             tape_left = tape_length;
1283             
1284             /* Reduce the size of the dump by amount written and reduce
1285                tape_left by the amount left over */
1286             dp = serial2disk(result_argv[2]);
1287             sched(dp)->act_size -= OFF_T_ATOI(result_argv[3]);
1288             if (sched(dp)->act_size < tape_left)
1289                 tape_left -= sched(dp)->act_size;
1290             else
1291                 tape_length = 0;
1292             
1293             break;
1294             
1295         case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
1296             dp = serial2disk(result_argv[2]);
1297             free_serial(result_argv[2]);
1298             printf("driver: finished-cmd time %s taper wrote %s:%s\n",
1299                    walltime_str(curclock()), dp->host->hostname, dp->name);
1300             fflush(stdout);
1301             log_add(L_WARNING, "Taper  error: %s", result_argv[3]);
1302             /*FALLTHROUGH*/
1303
1304         case BOGUS:
1305             if (cmd == BOGUS) {
1306                 log_add(L_WARNING, "Taper protocol error");
1307             }
1308             /*
1309              * Since we received a taper error, we can't send anything more
1310              * to the taper.  Go into degraded mode to try to get everthing
1311              * onto disk.  Later, these dumps can be flushed to a new tape.
1312              * The tape queue is zapped so that it appears empty in future
1313              * checks. If there are dumps waiting for diskspace to be freed,
1314              * cancel one.
1315              */
1316             if(!nodump) {
1317                 log_add(L_WARNING,
1318                         "going into degraded mode because of taper component error.");
1319                 start_degraded_mode(&runq);
1320             }
1321             tapeq.head = tapeq.tail = NULL;
1322             taper_busy = 0;
1323             taper_disk = NULL;
1324             if(taper_ev_read != NULL) {
1325                 event_release(taper_ev_read);
1326                 taper_ev_read = NULL;
1327             }
1328             if(cmd != TAPE_ERROR) aclose(taper);
1329             continue_port_dumps();
1330             break;
1331
1332         default:
1333             error("driver received unexpected token (%s) from taper",
1334                   cmdstr[cmd]);
1335             /*NOTREACHED*/
1336         }
1337         /*
1338          * Wakeup any dumpers that are sleeping because of network
1339          * or disk constraints.
1340          */
1341         start_some_dumps(&runq);
1342         
1343     } while(areads_dataready(taper));
1344 }
1345
1346 static dumper_t *
1347 idle_dumper(void)
1348 {
1349     dumper_t *dumper;
1350
1351     for(dumper = dmptable; dumper < dmptable+inparallel; dumper++)
1352         if(!dumper->busy && !dumper->down) return dumper;
1353
1354     return NULL;
1355 }
1356
1357 static void
1358 dumper_result(
1359     disk_t *    dp)
1360 {
1361     dumper_t *dumper;
1362     chunker_t *chunker;
1363     assignedhd_t **h=NULL;
1364     int activehd, i;
1365     off_t dummy;
1366     off_t size;
1367     int is_partial;
1368
1369     dumper = sched(dp)->dumper;
1370     chunker = dumper->chunker;
1371
1372     free_serial_dp(dp);
1373
1374     h = sched(dp)->holdp;
1375     activehd = sched(dp)->activehd;
1376
1377     if(dumper->result == DONE && chunker->result == DONE) {
1378         update_info_dumper(dp, sched(dp)->origsize,
1379                            sched(dp)->dumpsize, sched(dp)->dumptime);
1380         log_add(L_STATS, "estimate %s %s %s %d [sec %ld nkb " OFF_T_FMT
1381                 " ckb " OFF_T_FMT " kps %d]",
1382                 dp->host->hostname, dp->name, sched(dp)->datestamp,
1383                 sched(dp)->level,
1384                 sched(dp)->est_time, (OFF_T_FMT_TYPE)sched(dp)->est_nsize, 
1385                 (OFF_T_FMT_TYPE)sched(dp)->est_csize,
1386                 sched(dp)->est_kps);
1387     }
1388
1389     deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
1390
1391     is_partial = dumper->result != DONE || chunker->result != DONE;
1392     rename_tmp_holding(sched(dp)->destname, !is_partial);
1393
1394     dummy = (off_t)0;
1395     for( i = 0, h = sched(dp)->holdp; i < activehd; i++ ) {
1396         dummy += h[i]->used;
1397     }
1398
1399     size = size_holding_files(sched(dp)->destname, 0);
1400     h[activehd]->used = size - dummy;
1401     holdalloc(h[activehd]->disk)->allocated_dumpers--;
1402     adjust_diskspace(dp, DONE);
1403
1404     sched(dp)->attempted += 1;
1405
1406     if((dumper->result != DONE || chunker->result != DONE) &&
1407        sched(dp)->attempted <= 1) {
1408         delete_diskspace(dp);
1409         enqueue_disk(&runq, dp);
1410     }
1411     else if(size > (off_t)DISK_BLOCK_KB) {
1412         sched(dp)->attempted = 0;
1413         enqueue_disk(&tapeq, dp);
1414         startaflush();
1415     }
1416     else {
1417         delete_diskspace(dp);
1418     }
1419
1420     dumper->busy = 0;
1421     dp->host->inprogress -= 1;
1422     dp->inprogress = 0;
1423
1424     waitpid(chunker->pid, NULL, 0 );
1425     aclose(chunker->fd);
1426     chunker->fd = -1;
1427     chunker->down = 1;
1428     
1429     dp = NULL;
1430     if (chunker->result == ABORT_FINISHED)
1431         pending_aborts--;
1432     continue_port_dumps();
1433     /*
1434      * Wakeup any dumpers that are sleeping because of network
1435      * or disk constraints.
1436      */
1437     start_some_dumps(&runq);
1438 }
1439
1440
1441 static void
1442 handle_dumper_result(
1443     void *      cookie)
1444 {
1445     /*static int pending_aborts = 0;*/
1446     dumper_t *dumper = cookie;
1447     disk_t *dp, *sdp;
1448     cmd_t cmd;
1449     int result_argc;
1450     char *qname;
1451     char *result_argv[MAX_ARGS+1];
1452
1453     assert(dumper != NULL);
1454     dp = dumper->dp;
1455     assert(dp != NULL && sched(dp) != NULL);
1456
1457     do {
1458
1459         short_dump_state();
1460
1461         cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1462
1463         if(cmd != BOGUS) {
1464             /* result_argv[2] always contains the serial number */
1465             sdp = serial2disk(result_argv[2]);
1466             if (sdp != dp) {
1467                 error("%s: Invalid serial number", get_pname(), result_argv[2]);
1468                 /*NOTREACHED*/
1469             }
1470         }
1471
1472         qname = quote_string(dp->name);
1473         switch(cmd) {
1474
1475         case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
1476             if(result_argc != 6) {
1477                 error("error [dumper DONE result_argc != 6: %d]", result_argc);
1478                 /*NOTREACHED*/
1479             }
1480
1481             /*free_serial(result_argv[2]);*/
1482
1483             sched(dp)->origsize = OFF_T_ATOI(result_argv[3]);
1484             sched(dp)->dumptime = TIME_T_ATOI(result_argv[5]);
1485
1486             printf("driver: finished-cmd time %s %s dumped %s:%s\n",
1487                    walltime_str(curclock()), dumper->name,
1488                    dp->host->hostname, qname);
1489             fflush(stdout);
1490
1491             dumper->result = cmd;
1492
1493             break;
1494
1495         case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1496             /*
1497              * Requeue this disk, and fall through to the FAILED
1498              * case for cleanup.
1499              */
1500             if(sched(dp)->attempted) {
1501                 log_add(L_FAIL, "%s %s %s %d [too many dumper retry: %s]",
1502                     dp->host->hostname, dp->name, sched(dp)->datestamp,
1503                     sched(dp)->level, result_argv[3]);
1504                 printf("driver: dump failed %s %s %s, too many dumper retry: %s\n",
1505                         result_argv[2], dp->host->hostname, dp->name,
1506                         result_argv[3]);
1507             }
1508             /* FALLTHROUGH */
1509         case FAILED: /* FAILED <handle> <errstr> */
1510             /*free_serial(result_argv[2]);*/
1511             dumper->result = cmd;
1512             break;
1513
1514         case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1515             /*
1516              * We sent an ABORT from the NO-ROOM case because this dump
1517              * wasn't going to fit onto the holding disk.  We now need to
1518              * clean up the remains of this image, and try to finish
1519              * other dumps that are waiting on disk space.
1520              */
1521             assert(pending_aborts);
1522             /*free_serial(result_argv[2]);*/
1523             dumper->result = cmd;
1524             break;
1525
1526         case BOGUS:
1527             /* either EOF or garbage from dumper.  Turn it off */
1528             log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1529                     dumper->name, (long)dumper->pid);
1530             if (dumper->ev_read) {
1531                 event_release(dumper->ev_read);
1532                 dumper->ev_read = NULL;
1533             }
1534             aclose(dumper->fd);
1535             dumper->busy = 0;
1536             dumper->down = 1;   /* mark it down so it isn't used again */
1537             if(dp) {
1538                 /* if it was dumping something, zap it and try again */
1539                 if(sched(dp)->attempted) {
1540                 log_add(L_FAIL, "%s %s %s %d [%s died]",
1541                         dp->host->hostname, qname, sched(dp)->datestamp,
1542                         sched(dp)->level, dumper->name);
1543                 }
1544                 else {
1545                 log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1546                         dumper->name, dp->host->hostname, qname,
1547                         sched(dp)->level);
1548                 }
1549             }
1550             dumper->result = cmd;
1551             break;
1552
1553         default:
1554             assert(0);
1555         }
1556         amfree(qname);
1557
1558         /* send the dumper result to the chunker */
1559         if(dumper->chunker->down == 0 && dumper->chunker->fd != -1 &&
1560            dumper->chunker->result == LAST_TOK) {
1561             if(cmd == DONE) {
1562                 chunker_cmd(dumper->chunker, DONE, dp);
1563             }
1564             else {
1565                 chunker_cmd(dumper->chunker, FAILED, dp);
1566             }
1567         }
1568
1569         if(dumper->result != LAST_TOK && dumper->chunker->result != LAST_TOK)
1570             dumper_result(dp);
1571
1572     } while(areads_dataready(dumper->fd));
1573 }
1574
1575
1576 static void
1577 handle_chunker_result(
1578     void *      cookie)
1579 {
1580     /*static int pending_aborts = 0;*/
1581     chunker_t *chunker = cookie;
1582     assignedhd_t **h=NULL;
1583     dumper_t *dumper;
1584     disk_t *dp, *sdp;
1585     cmd_t cmd;
1586     int result_argc;
1587     char *result_argv[MAX_ARGS+1];
1588     int dummy;
1589     int activehd = -1;
1590     char *qname;
1591
1592     assert(chunker != NULL);
1593     dumper = chunker->dumper;
1594     assert(dumper != NULL);
1595     dp = dumper->dp;
1596     assert(dp != NULL);
1597     assert(sched(dp) != NULL);
1598     assert(sched(dp)->destname != NULL);
1599     assert(dp != NULL && sched(dp) != NULL && sched(dp)->destname);
1600
1601     if(dp && sched(dp) && sched(dp)->holdp) {
1602         h = sched(dp)->holdp;
1603         activehd = sched(dp)->activehd;
1604     }
1605
1606     do {
1607
1608         short_dump_state();
1609
1610         cmd = getresult(chunker->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
1611
1612         if(cmd != BOGUS) {
1613             /* result_argv[2] always contains the serial number */
1614             sdp = serial2disk(result_argv[2]);
1615             if (sdp != dp) {
1616                 error("%s: Invalid serial number", get_pname(), result_argv[2]);
1617                 /*NOTREACHED*/
1618             }
1619         }
1620
1621         switch(cmd) {
1622
1623         case PARTIAL: /* PARTIAL <handle> <dumpsize> <errstr> */
1624         case DONE: /* DONE <handle> <dumpsize> <errstr> */
1625             if(result_argc != 4) {
1626                 error("error [chunker %s result_argc != 4: %d]", cmdstr[cmd],
1627                       result_argc);
1628                 /*NOTREACHED*/
1629             }
1630             /*free_serial(result_argv[2]);*/
1631
1632             sched(dp)->dumpsize = (off_t)atof(result_argv[3]);
1633
1634             qname = quote_string(dp->name);
1635             printf("driver: finished-cmd time %s %s chunked %s:%s\n",
1636                    walltime_str(curclock()), chunker->name,
1637                    dp->host->hostname, qname);
1638             fflush(stdout);
1639             amfree(qname);
1640
1641             event_release(chunker->ev_read);
1642
1643             chunker->result = cmd;
1644
1645             break;
1646
1647         case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
1648             event_release(chunker->ev_read);
1649
1650             chunker->result = cmd;
1651
1652             break;
1653         case FAILED: /* FAILED <handle> <errstr> */
1654             /*free_serial(result_argv[2]);*/
1655
1656             event_release(chunker->ev_read);
1657
1658             chunker->result = cmd;
1659
1660             break;
1661
1662         case NO_ROOM: /* NO-ROOM <handle> <missing_size> */
1663             if (!h || activehd < 0) { /* should never happen */
1664                 error("!h || activehd < 0");
1665                 /*NOTREACHED*/
1666             }
1667             h[activehd]->used -= OFF_T_ATOI(result_argv[3]);
1668             h[activehd]->reserved -= OFF_T_ATOI(result_argv[3]);
1669             holdalloc(h[activehd]->disk)->allocated_space -= OFF_T_ATOI(result_argv[3]);
1670             h[activehd]->disk->disksize -= OFF_T_ATOI(result_argv[3]);
1671             break;
1672
1673         case RQ_MORE_DISK: /* RQ-MORE-DISK <handle> */
1674             if (!h || activehd < 0) { /* should never happen */
1675                 error("!h || activehd < 0");
1676                 /*NOTREACHED*/
1677             }
1678             holdalloc(h[activehd]->disk)->allocated_dumpers--;
1679             h[activehd]->used = h[activehd]->reserved;
1680             if( h[++activehd] ) { /* There's still some allocated space left.
1681                                    * Tell the dumper about it. */
1682                 sched(dp)->activehd++;
1683                 chunker_cmd( chunker, CONTINUE, dp );
1684             } else { /* !h[++activehd] - must allocate more space */
1685                 sched(dp)->act_size = sched(dp)->est_size; /* not quite true */
1686                 sched(dp)->est_size = (sched(dp)->act_size/(off_t)20) * (off_t)21; /* +5% */
1687                 sched(dp)->est_size = am_round(sched(dp)->est_size, (off_t)DISK_BLOCK_KB);
1688                 if (sched(dp)->est_size < sched(dp)->act_size + 2*DISK_BLOCK_KB)
1689                     sched(dp)->est_size += 2 * DISK_BLOCK_KB;
1690                 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1691                                     &dummy,
1692                                     h[activehd-1] );
1693                 if( !h ) {
1694                     /* No diskspace available. The reason for this will be
1695                      * determined in continue_port_dumps(). */
1696                     enqueue_disk( &roomq, dp );
1697                     continue_port_dumps();
1698                 } else {
1699                     /* OK, allocate space for disk and have chunker continue */
1700                     sched(dp)->activehd = assign_holdingdisk( h, dp );
1701                     chunker_cmd( chunker, CONTINUE, dp );
1702                     amfree(h);
1703                 }
1704             }
1705             break;
1706
1707         case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
1708             /*
1709              * We sent an ABORT from the NO-ROOM case because this dump
1710              * wasn't going to fit onto the holding disk.  We now need to
1711              * clean up the remains of this image, and try to finish
1712              * other dumps that are waiting on disk space.
1713              */
1714             /*assert(pending_aborts);*/
1715
1716             /*free_serial(result_argv[2]);*/
1717
1718             event_release(chunker->ev_read);
1719
1720             chunker->result = cmd;
1721
1722             break;
1723
1724         case BOGUS:
1725             /* either EOF or garbage from chunker.  Turn it off */
1726             log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
1727                     chunker->name, (long)chunker->pid);
1728
1729             if(dp) {
1730                 /* if it was dumping something, zap it and try again */
1731                 if (!h || activehd < 0) { /* should never happen */
1732                     error("!h || activehd < 0");
1733                     /*NOTREACHED*/
1734                 }
1735                 qname = quote_string(dp->name);
1736                 if(sched(dp)->attempted) {
1737                     log_add(L_FAIL, "%s %s %s %d [%s died]",
1738                             dp->host->hostname, qname, sched(dp)->datestamp,
1739                             sched(dp)->level, chunker->name);
1740                 }
1741                 else {
1742                     log_add(L_WARNING, "%s died while dumping %s:%s lev %d.",
1743                             chunker->name, dp->host->hostname, qname,
1744                             sched(dp)->level);
1745                 }
1746                 amfree(qname);
1747                 dp = NULL;
1748             }
1749
1750             event_release(chunker->ev_read);
1751
1752             chunker->result = cmd;
1753
1754             break;
1755
1756         default:
1757             assert(0);
1758         }
1759
1760         if(chunker->result != LAST_TOK && chunker->dumper->result != LAST_TOK)
1761             dumper_result(dp);
1762
1763     } while(areads_dataready(chunker->fd));
1764 }
1765
1766
1767 static disklist_t
1768 read_flush(void)
1769 {
1770     sched_t *sp;
1771     disk_t *dp;
1772     int line;
1773     dumpfile_t file;
1774     char *hostname, *diskname, *datestamp;
1775     int level;
1776     char *destname;
1777     disk_t *dp1;
1778     char *inpline = NULL;
1779     char *command;
1780     char *s;
1781     int ch;
1782     disklist_t tq;
1783     char *qname = NULL;
1784
1785     tq.head = tq.tail = NULL;
1786
1787     for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1788         line++;
1789         if (inpline[0] == '\0')
1790             continue;
1791
1792         s = inpline;
1793         ch = *s++;
1794
1795         skip_whitespace(s, ch);                 /* find the command */
1796         if(ch == '\0') {
1797             error("flush line %d: syntax error (no command)", line);
1798             /*NOTREACHED*/
1799         }
1800         command = s - 1;
1801         skip_non_whitespace(s, ch);
1802         s[-1] = '\0';
1803
1804         if(strcmp(command,"ENDFLUSH") == 0) {
1805             break;
1806         }
1807
1808         if(strcmp(command,"FLUSH") != 0) {
1809             error("flush line %d: syntax error (%s != FLUSH)", line, command);
1810             /*NOTREACHED*/
1811         }
1812
1813         skip_whitespace(s, ch);                 /* find the hostname */
1814         if(ch == '\0') {
1815             error("flush line %d: syntax error (no hostname)", line);
1816             /*NOTREACHED*/
1817         }
1818         hostname = s - 1;
1819         skip_non_whitespace(s, ch);
1820         s[-1] = '\0';
1821
1822         skip_whitespace(s, ch);                 /* find the diskname */
1823         if(ch == '\0') {
1824             error("flush line %d: syntax error (no diskname)", line);
1825             /*NOTREACHED*/
1826         }
1827         qname = s - 1;
1828         skip_quoted_string(s, ch);
1829         s[-1] = '\0';                           /* terminate the disk name */
1830         diskname = unquote_string(qname);
1831
1832         skip_whitespace(s, ch);                 /* find the datestamp */
1833         if(ch == '\0') {
1834             error("flush line %d: syntax error (no datestamp)", line);
1835             /*NOTREACHED*/
1836         }
1837         datestamp = s - 1;
1838         skip_non_whitespace(s, ch);
1839         s[-1] = '\0';
1840
1841         skip_whitespace(s, ch);                 /* find the level number */
1842         if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
1843             error("flush line %d: syntax error (bad level)", line);
1844             /*NOTREACHED*/
1845         }
1846         skip_integer(s, ch);
1847
1848         skip_whitespace(s, ch);                 /* find the filename */
1849         if(ch == '\0') {
1850             error("flush line %d: syntax error (no filename)", line);
1851             /*NOTREACHED*/
1852         }
1853         destname = s - 1;
1854         skip_non_whitespace(s, ch);
1855         s[-1] = '\0';
1856
1857         get_dumpfile(destname, &file);
1858         if( file.type != F_DUMPFILE) {
1859             if( file.type != F_CONT_DUMPFILE )
1860                 log_add(L_INFO, "%s: ignoring cruft file.", destname);
1861             amfree(diskname);
1862             continue;
1863         }
1864
1865         if(strcmp(hostname, file.name) != 0 ||
1866            strcmp(diskname, file.disk) != 0 ||
1867            strcmp(datestamp, file.datestamp) != 0) {
1868             log_add(L_INFO, "disk %s:%s not consistent with file %s",
1869                     hostname, diskname, destname);
1870             amfree(diskname);
1871             continue;
1872         }
1873         amfree(diskname);
1874
1875         dp = lookup_disk(file.name, file.disk);
1876
1877         if (dp == NULL) {
1878             log_add(L_INFO, "%s: disk %s:%s not in database, skipping it.",
1879                     destname, file.name, file.disk);
1880             continue;
1881         }
1882
1883         if(file.dumplevel < 0 || file.dumplevel > 9) {
1884             log_add(L_INFO, "%s: ignoring file with bogus dump level %d.",
1885                     destname, file.dumplevel);
1886             continue;
1887         }
1888
1889         dp1 = (disk_t *)alloc(SIZEOF(disk_t));
1890         *dp1 = *dp;
1891         dp1->next = dp1->prev = NULL;
1892
1893         /* add it to the flushhost list */
1894         if(!flushhost) {
1895             flushhost = alloc(SIZEOF(am_host_t));
1896             flushhost->next = NULL;
1897             flushhost->hostname = stralloc("FLUSHHOST");
1898             flushhost->up = NULL;
1899             flushhost->features = NULL;
1900         }
1901         dp1->hostnext = flushhost->disks;
1902         flushhost->disks = dp1;
1903
1904         sp = (sched_t *) alloc(SIZEOF(sched_t));
1905         sp->destname = stralloc(destname);
1906         sp->level = file.dumplevel;
1907         sp->dumpdate = NULL;
1908         sp->degr_dumpdate = NULL;
1909         sp->datestamp = stralloc(file.datestamp);
1910         sp->est_nsize = (off_t)0;
1911         sp->est_csize = (off_t)0;
1912         sp->est_time = 0;
1913         sp->est_kps = 10;
1914         sp->priority = 0;
1915         sp->degr_level = -1;
1916         sp->attempted = 0;
1917         sp->act_size = size_holding_files(destname, 0);
1918         sp->holdp = build_diskspace(destname);
1919         if(sp->holdp == NULL) continue;
1920         sp->dumper = NULL;
1921         sp->timestamp = (time_t)0;
1922
1923         dp1->up = (char *)sp;
1924
1925         enqueue_disk(&tq, dp1);
1926     }
1927     amfree(inpline);
1928
1929     /*@i@*/ return tq;
1930 }
1931
1932 static void
1933 read_schedule(
1934     void *      cookie)
1935 {
1936     sched_t *sp;
1937     disk_t *dp;
1938     int level, line, priority;
1939     char *dumpdate, *degr_dumpdate;
1940     int degr_level;
1941     time_t time, degr_time;
1942     time_t *time_p = &time;
1943     time_t *degr_time_p = &degr_time;
1944     off_t nsize, csize, degr_nsize, degr_csize;
1945     unsigned long kps, degr_kps;
1946     char *hostname, *features, *diskname, *datestamp, *inpline = NULL;
1947     char *command;
1948     char *s;
1949     int ch;
1950     off_t flush_size = (off_t)0;
1951     char *qname = NULL;
1952
1953     (void)cookie;       /* Quiet unused parameter warning */
1954
1955     event_release(schedule_ev_read);
1956
1957     /* read schedule from stdin */
1958
1959     for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
1960         if (inpline[0] == '\0')
1961             continue;
1962         line++;
1963
1964         s = inpline;
1965         ch = *s++;
1966
1967         skip_whitespace(s, ch);                 /* find the command */
1968         if(ch == '\0') {
1969             error("schedule line %d: syntax error (no command)", line);
1970             /*NOTREACHED*/
1971         }
1972         command = s - 1;
1973         skip_non_whitespace(s, ch);
1974         s[-1] = '\0';
1975
1976         if(strcmp(command,"DUMP") != 0) {
1977             error("schedule line %d: syntax error (%s != DUMP)", line, command);
1978             /*NOTREACHED*/
1979         }
1980
1981         skip_whitespace(s, ch);                 /* find the host name */
1982         if(ch == '\0') {
1983             error("schedule line %d: syntax error (no host name)", line);
1984             /*NOTREACHED*/
1985         }
1986         hostname = s - 1;
1987         skip_non_whitespace(s, ch);
1988         s[-1] = '\0';
1989
1990         skip_whitespace(s, ch);                 /* find the feature list */
1991         if(ch == '\0') {
1992             error("schedule line %d: syntax error (no feature list)", line);
1993             /*NOTREACHED*/
1994         }
1995         features = s - 1;
1996         skip_non_whitespace(s, ch);
1997         s[-1] = '\0';
1998
1999         skip_whitespace(s, ch);                 /* find the disk name */
2000         if(ch == '\0') {
2001             error("schedule line %d: syntax error (no disk name)", line);
2002             /*NOTREACHED*/
2003         }
2004         qname = s - 1;
2005         skip_quoted_string(s, ch);
2006         s[-1] = '\0';                           /* terminate the disk name */
2007         diskname = unquote_string(qname);
2008
2009         skip_whitespace(s, ch);                 /* find the datestamp */
2010         if(ch == '\0') {
2011             error("schedule line %d: syntax error (no datestamp)", line);
2012             /*NOTREACHED*/
2013         }
2014         datestamp = s - 1;
2015         skip_non_whitespace(s, ch);
2016         s[-1] = '\0';
2017
2018         skip_whitespace(s, ch);                 /* find the priority number */
2019         if(ch == '\0' || sscanf(s - 1, "%d", &priority) != 1) {
2020             error("schedule line %d: syntax error (bad priority)", line);
2021             /*NOTREACHED*/
2022         }
2023         skip_integer(s, ch);
2024
2025         skip_whitespace(s, ch);                 /* find the level number */
2026         if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
2027             error("schedule line %d: syntax error (bad level)", line);
2028             /*NOTREACHED*/
2029         }
2030         skip_integer(s, ch);
2031
2032         skip_whitespace(s, ch);                 /* find the dump date */
2033         if(ch == '\0') {
2034             error("schedule line %d: syntax error (bad dump date)", line);
2035             /*NOTREACHED*/
2036         }
2037         dumpdate = s - 1;
2038         skip_non_whitespace(s, ch);
2039         s[-1] = '\0';
2040
2041         skip_whitespace(s, ch);                 /* find the native size */
2042         if(ch == '\0' || sscanf(s - 1, OFF_T_FMT, 
2043                                 (OFF_T_FMT_TYPE *)&nsize) != 1) {
2044             error("schedule line %d: syntax error (bad nsize)", line);
2045             /*NOTREACHED*/
2046         }
2047         skip_integer(s, ch);
2048
2049         skip_whitespace(s, ch);                 /* find the compressed size */
2050         if(ch == '\0' || sscanf(s - 1, OFF_T_FMT, 
2051                                 (OFF_T_FMT_TYPE *)&csize) != 1) {
2052             error("schedule line %d: syntax error (bad csize)", line);
2053             /*NOTREACHED*/
2054         }
2055         skip_integer(s, ch);
2056
2057         skip_whitespace(s, ch);                 /* find the time number */
2058         if(ch == '\0' || sscanf(s - 1, TIME_T_FMT,
2059                                 (TIME_T_FMT_TYPE *)time_p) != 1) {
2060             error("schedule line %d: syntax error (bad estimated time)", line);
2061             /*NOTREACHED*/
2062         }
2063         skip_integer(s, ch);
2064
2065         skip_whitespace(s, ch);                 /* find the kps number */
2066         if(ch == '\0' || sscanf(s - 1, "%lu", &kps) != 1) {
2067             error("schedule line %d: syntax error (bad kps)", line);
2068             continue;
2069         }
2070         skip_integer(s, ch);
2071
2072         degr_dumpdate = NULL;                   /* flag if degr fields found */
2073         skip_whitespace(s, ch);                 /* find the degr level number */
2074         if(ch != '\0') {
2075             if(sscanf(s - 1, "%d", &degr_level) != 1) {
2076                 error("schedule line %d: syntax error (bad degr level)", line);
2077                 /*NOTREACHED*/
2078             }
2079             skip_integer(s, ch);
2080
2081             skip_whitespace(s, ch);             /* find the degr dump date */
2082             if(ch == '\0') {
2083                 error("schedule line %d: syntax error (bad degr dump date)", line);
2084                 /*NOTREACHED*/
2085             }
2086             degr_dumpdate = s - 1;
2087             skip_non_whitespace(s, ch);
2088             s[-1] = '\0';
2089
2090             skip_whitespace(s, ch);             /* find the degr native size */
2091             if(ch == '\0'  || sscanf(s - 1, OFF_T_FMT, 
2092                         (OFF_T_FMT_TYPE *)&degr_nsize) != 1) {
2093                 error("schedule line %d: syntax error (bad degr nsize)", line);
2094                 /*NOTREACHED*/
2095             }
2096             skip_integer(s, ch);
2097
2098             skip_whitespace(s, ch);             /* find the degr compressed size */
2099             if(ch == '\0'  || sscanf(s - 1, OFF_T_FMT, 
2100                         (OFF_T_FMT_TYPE *)&degr_csize) != 1) {
2101                 error("schedule line %d: syntax error (bad degr csize)", line);
2102                 /*NOTREACHED*/
2103             }
2104             skip_integer(s, ch);
2105
2106             skip_whitespace(s, ch);             /* find the degr time number */
2107             if(ch == '\0' || sscanf(s - 1, TIME_T_FMT,
2108                                 (TIME_T_FMT_TYPE *)degr_time_p) != 1) {
2109                 error("schedule line %d: syntax error (bad degr estimated time)", line);
2110                 /*NOTREACHED*/
2111             }
2112             skip_integer(s, ch);
2113
2114             skip_whitespace(s, ch);             /* find the degr kps number */
2115             if(ch == '\0' || sscanf(s - 1, "%lu", &degr_kps) != 1) {
2116                 error("schedule line %d: syntax error (bad degr kps)", line);
2117                 /*NOTREACHED*/
2118             }
2119             skip_integer(s, ch);
2120         }
2121
2122         dp = lookup_disk(hostname, diskname);
2123         if(dp == NULL) {
2124             log_add(L_WARNING,
2125                     "schedule line %d: %s:'%s' not in disklist, ignored",
2126                     line, hostname, qname);
2127             amfree(diskname);
2128             continue;
2129         }
2130
2131         sp = (sched_t *) alloc(SIZEOF(sched_t));
2132         /*@ignore@*/
2133         sp->level = level;
2134         sp->dumpdate = stralloc(dumpdate);
2135         sp->est_nsize = DISK_BLOCK_KB + nsize; /* include header */
2136         sp->est_csize = DISK_BLOCK_KB + csize; /* include header */
2137         /* round estimate to next multiple of DISK_BLOCK_KB */
2138         sp->est_csize = am_round(sp->est_csize, DISK_BLOCK_KB);
2139         sp->est_size = sp->est_csize;
2140         sp->est_time = time;
2141         sp->est_kps = kps;
2142         sp->priority = priority;
2143         sp->datestamp = stralloc(datestamp);
2144
2145         if(degr_dumpdate) {
2146             sp->degr_level = degr_level;
2147             sp->degr_dumpdate = stralloc(degr_dumpdate);
2148             sp->degr_nsize = DISK_BLOCK_KB + degr_nsize;
2149             sp->degr_csize = DISK_BLOCK_KB + degr_csize;
2150             /* round estimate to next multiple of DISK_BLOCK_KB */
2151             sp->degr_csize = am_round(sp->degr_csize, DISK_BLOCK_KB);
2152             sp->degr_time = degr_time;
2153             sp->degr_kps = degr_kps;
2154         } else {
2155             sp->degr_level = -1;
2156             sp->degr_dumpdate = NULL;
2157         }
2158         /*@end@*/
2159
2160         sp->attempted = 0;
2161         sp->act_size = (off_t)0;
2162         sp->holdp = NULL;
2163         sp->activehd = -1;
2164         sp->dumper = NULL;
2165         sp->timestamp = (time_t)0;
2166         sp->destname = NULL;
2167         sp->no_space = 0;
2168
2169         dp->up = (char *) sp;
2170         if(dp->host->features == NULL) {
2171             dp->host->features = am_string_to_feature(features);
2172         }
2173         remove_disk(&waitq, dp);
2174         enqueue_disk(&runq, dp);
2175         flush_size += sp->act_size;
2176         amfree(diskname);
2177     }
2178     printf("driver: flush size " OFF_T_FMT "\n", (OFF_T_FMT_TYPE)flush_size);
2179     amfree(inpline);
2180     if(line == 0)
2181         log_add(L_WARNING, "WARNING: got empty schedule from planner");
2182     if(need_degraded==1) start_degraded_mode(&runq);
2183     start_some_dumps(&runq);
2184 }
2185
2186 static unsigned long
2187 free_kps(
2188     interface_t *ip)
2189 {
2190     unsigned long res;
2191
2192     if (ip == (interface_t *)0) {
2193         interface_t *p;
2194         unsigned long maxusage=0;
2195         unsigned long curusage=0;
2196         for(p = lookup_interface(NULL); p != NULL; p = p->next) {
2197             maxusage += interface_get_maxusage(p);
2198             curusage += p->curusage;
2199         }
2200         res = maxusage - curusage;
2201 #ifndef __lint
2202     } else {
2203         res = interface_get_maxusage(ip) - ip->curusage;
2204 #endif
2205     }
2206
2207     return res;
2208 }
2209
2210 static void
2211 interface_state(
2212     char *time_str)
2213 {
2214     interface_t *ip;
2215
2216     printf("driver: interface-state time %s", time_str);
2217
2218     for(ip = lookup_interface(NULL); ip != NULL; ip = ip->next) {
2219         printf(" if %s: free %lu", ip->name, free_kps(ip));
2220     }
2221     printf("\n");
2222 }
2223
2224 static void
2225 allocate_bandwidth(
2226     interface_t *       ip,
2227     unsigned long       kps)
2228 {
2229     ip->curusage += kps;
2230 }
2231
2232 static void
2233 deallocate_bandwidth(
2234     interface_t *       ip,
2235     unsigned long       kps)
2236 {
2237     assert(kps <= ip->curusage);
2238     ip->curusage -= kps;
2239 }
2240
2241 /* ------------ */
2242 static off_t
2243 free_space(void)
2244 {
2245     holdingdisk_t *hdp;
2246     off_t total_free;
2247     off_t diff;
2248
2249     total_free = (off_t)0;
2250     for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2251         diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2252         if(diff > (off_t)0)
2253             total_free += diff;
2254     }
2255     return total_free;
2256 }
2257
2258 /*
2259  * We return an array of pointers to assignedhd_t. The array contains at
2260  * most one entry per holding disk. The list of pointers is terminated by
2261  * a NULL pointer. Each entry contains a pointer to a holdingdisk and
2262  * how much diskspace to use on that disk. Later on, assign_holdingdisk
2263  * will allocate the given amount of space.
2264  * If there is not enough room on the holdingdisks, NULL is returned.
2265  */
2266
2267 static assignedhd_t **
2268 find_diskspace(
2269     off_t               size,
2270     int *               cur_idle,
2271     assignedhd_t *      pref)
2272 {
2273     assignedhd_t **result = NULL;
2274     holdingdisk_t *minp, *hdp;
2275     int i=0, num_holdingdisks=0; /* are we allowed to use the global thing? */
2276     int j, minj;
2277     char *used;
2278     off_t halloc, dalloc, hfree, dfree;
2279
2280     (void)cur_idle;     /* Quiet unused parameter warning */
2281
2282     if (size < 2*DISK_BLOCK_KB)
2283         size = 2*DISK_BLOCK_KB;
2284     size = am_round(size, (off_t)DISK_BLOCK_KB);
2285
2286 #ifdef HOLD_DEBUG
2287     printf("%s: want " OFF_T_FMT " K\n", debug_prefix_time(": find_diskspace"),
2288            (OFF_T_FMT_TYPE)size);
2289     fflush(stdout);
2290 #endif
2291
2292     for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2293         num_holdingdisks++;
2294     }
2295
2296     used = alloc(SIZEOF(*used) * num_holdingdisks);/*disks used during this run*/
2297     memset( used, 0, (size_t)num_holdingdisks );
2298     result = alloc(SIZEOF(assignedhd_t *) * (num_holdingdisks + 1));
2299     result[0] = NULL;
2300
2301     while( i < num_holdingdisks && size > (off_t)0 ) {
2302         /* find the holdingdisk with the fewest active dumpers and among
2303          * those the one with the biggest free space
2304          */
2305         minp = NULL; minj = -1;
2306         for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next, j++ ) {
2307             if( pref && pref->disk == hdp && !used[j] &&
2308                 holdalloc(hdp)->allocated_space <= hdp->disksize - (off_t)DISK_BLOCK_KB) {
2309                 minp = hdp;
2310                 minj = j;
2311                 break;
2312             }
2313             else if( holdalloc(hdp)->allocated_space <= hdp->disksize - (off_t)(2*DISK_BLOCK_KB) &&
2314                 !used[j] &&
2315                 (!minp ||
2316                  holdalloc(hdp)->allocated_dumpers < holdalloc(minp)->allocated_dumpers ||
2317                  (holdalloc(hdp)->allocated_dumpers == holdalloc(minp)->allocated_dumpers &&
2318                   hdp->disksize-holdalloc(hdp)->allocated_space > minp->disksize-holdalloc(minp)->allocated_space)) ) {
2319                 minp = hdp;
2320                 minj = j;
2321             }
2322         }
2323
2324         pref = NULL;
2325         if( !minp ) { break; } /* all holding disks are full */
2326         used[minj] = 1;
2327
2328         /* hfree = free space on the disk */
2329         hfree = minp->disksize - holdalloc(minp)->allocated_space;
2330
2331         /* dfree = free space for data, remove 1 header for each chunksize */
2332         dfree = hfree - (((hfree-(off_t)1)/holdingdisk_get_chunksize(minp))+(off_t)1) * (off_t)DISK_BLOCK_KB;
2333
2334         /* dalloc = space I can allocate for data */
2335         dalloc = ( dfree < size ) ? dfree : size;
2336
2337         /* halloc = space to allocate, including 1 header for each chunksize */
2338         halloc = dalloc + (((dalloc-(off_t)1)/holdingdisk_get_chunksize(minp))+(off_t)1) * (off_t)DISK_BLOCK_KB;
2339
2340 #ifdef HOLD_DEBUG
2341         printf("%s: find diskspace: size " OFF_T_FMT " hf " OFF_T_FMT
2342                " df " OFF_T_FMT " da " OFF_T_FMT " ha " OFF_T_FMT "\n",
2343                debug_prefix_time(": find_diskspace"),
2344                (OFF_T_FMT_TYPE)size,
2345                (OFF_T_FMT_TYPE)hfree,
2346                (OFF_T_FMT_TYPE)dfree,
2347                (OFF_T_FMT_TYPE)dalloc,
2348                (OFF_T_FMT_TYPE)halloc);
2349         fflush(stdout);
2350 #endif
2351         size -= dalloc;
2352         result[i] = alloc(SIZEOF(assignedhd_t));
2353         result[i]->disk = minp;
2354         result[i]->reserved = halloc;
2355         result[i]->used = (off_t)0;
2356         result[i]->destname = NULL;
2357         result[i+1] = NULL;
2358         i++;
2359     } /* while i < num_holdingdisks && size > 0 */
2360     amfree(used);
2361
2362     if(size != (off_t)0) { /* not enough space available */
2363         printf("find diskspace: not enough diskspace. Left with "
2364                OFF_T_FMT " K\n", (OFF_T_FMT_TYPE)size);
2365         fflush(stdout);
2366         free_assignedhd(result);
2367         result = NULL;
2368     }
2369
2370 #ifdef HOLD_DEBUG
2371     for( i = 0; result && result[i]; i++ ) {
2372         printf("%s: find diskspace: selected %s free " OFF_T_FMT " reserved " OFF_T_FMT " dumpers %d\n",
2373                 debug_prefix_time(": find_diskspace"),
2374                 holdingdisk_get_diskdir(result[i]->disk),
2375                 (OFF_T_FMT_TYPE)(result[i]->disk->disksize -
2376                   holdalloc(result[i]->disk)->allocated_space),
2377                 (OFF_T_FMT_TYPE)result[i]->reserved,
2378                 holdalloc(result[i]->disk)->allocated_dumpers);
2379     }
2380     fflush(stdout);
2381 #endif
2382
2383     return result;
2384 }
2385
2386 static int
2387 assign_holdingdisk(
2388     assignedhd_t **     holdp,
2389     disk_t *            diskp)
2390 {
2391     int i, j, c, l=0;
2392     off_t size;
2393     char *sfn = sanitise_filename(diskp->name);
2394     char lvl[64];
2395     assignedhd_t **new_holdp;
2396     char *qname;
2397
2398     snprintf( lvl, SIZEOF(lvl), "%d", sched(diskp)->level );
2399
2400     size = am_round(sched(diskp)->est_size - sched(diskp)->act_size,
2401                     (off_t)DISK_BLOCK_KB);
2402
2403     for( c = 0; holdp[c]; c++ )
2404         (void)c; /* count number of disks */
2405
2406     /* allocate memory for sched(diskp)->holdp */
2407     for(j = 0; sched(diskp)->holdp && sched(diskp)->holdp[j]; j++)
2408         (void)j;        /* Quiet lint */
2409     new_holdp = (assignedhd_t **)alloc(SIZEOF(assignedhd_t*)*(j+c+1));
2410     if (sched(diskp)->holdp) {
2411         memcpy(new_holdp, sched(diskp)->holdp, j * SIZEOF(*new_holdp));
2412         amfree(sched(diskp)->holdp);
2413     }
2414     sched(diskp)->holdp = new_holdp;
2415     new_holdp = NULL;
2416
2417     i = 0;
2418     if( j > 0 ) { /* This is a request for additional diskspace. See if we can
2419                    * merge assignedhd_t's */
2420         l=j;
2421         if( sched(diskp)->holdp[j-1]->disk == holdp[0]->disk ) { /* Yes! */
2422             sched(diskp)->holdp[j-1]->reserved += holdp[0]->reserved;
2423             holdalloc(holdp[0]->disk)->allocated_space += holdp[0]->reserved;
2424             size = (holdp[0]->reserved>size) ? (off_t)0 : size-holdp[0]->reserved;
2425             qname = quote_string(diskp->name);
2426 #ifdef HOLD_DEBUG
2427             printf("%s: merging holding disk %s to disk %s:%s, add " OFF_T_FMT " for reserved " OFF_T_FMT ", left " OFF_T_FMT "\n",
2428                    debug_prefix_time(": assign_holdingdisk"),
2429                    holdingdisk_get_diskdir(sched(diskp)->holdp[j-1]->disk),
2430                    diskp->host->hostname, qname,
2431                    (OFF_T_FMT_TYPE)holdp[0]->reserved,
2432                    (OFF_T_FMT_TYPE)sched(diskp)->holdp[j-1]->reserved,
2433                    (OFF_T_FMT_TYPE)size);
2434             fflush(stdout);
2435 #endif
2436             i++;
2437             amfree(qname);
2438             amfree(holdp[0]);
2439             l=j-1;
2440         }
2441     }
2442
2443     /* copy assignedhd_s to sched(diskp), adjust allocated_space */
2444     for( ; holdp[i]; i++ ) {
2445         holdp[i]->destname = newvstralloc( holdp[i]->destname,
2446                                            holdingdisk_get_diskdir(holdp[i]->disk), "/",
2447                                            hd_driver_timestamp, "/",
2448                                            diskp->host->hostname, ".",
2449                                            sfn, ".",
2450                                            lvl, NULL );
2451         sched(diskp)->holdp[j++] = holdp[i];
2452         holdalloc(holdp[i]->disk)->allocated_space += holdp[i]->reserved;
2453         size = (holdp[i]->reserved > size) ? (off_t)0 :
2454                   (size - holdp[i]->reserved);
2455         qname = quote_string(diskp->name);
2456 #ifdef HOLD_DEBUG
2457         printf("%s: %d assigning holding disk %s to disk %s:%s, reserved " OFF_T_FMT ", left " OFF_T_FMT "\n",
2458                 debug_prefix_time(": assign_holdingdisk"),
2459                 i, holdingdisk_get_diskdir(holdp[i]->disk), diskp->host->hostname, qname,
2460                 (OFF_T_FMT_TYPE)holdp[i]->reserved,
2461                 (OFF_T_FMT_TYPE)size);
2462         fflush(stdout);
2463 #endif
2464         amfree(qname);
2465         holdp[i] = NULL; /* so it doesn't get free()d... */
2466     }
2467     sched(diskp)->holdp[j] = NULL;
2468     amfree(sfn);
2469
2470     return l;
2471 }
2472
2473 static void
2474 adjust_diskspace(
2475     disk_t *    diskp,
2476     cmd_t       cmd)
2477 {
2478     assignedhd_t **holdp;
2479     off_t total = (off_t)0;
2480     off_t diff;
2481     int i;
2482     char *qname, *hqname, *qdest;
2483
2484     (void)cmd;  /* Quiet unused parameter warning */
2485
2486     qname = quote_string(diskp->name);
2487     qdest = quote_string(sched(diskp)->destname);
2488 #ifdef HOLD_DEBUG
2489     printf("%s: %s:%s %s\n",
2490            debug_prefix_time(": adjust_diskspace"),
2491            diskp->host->hostname, qname, qdest);
2492     fflush(stdout);
2493 #endif
2494
2495     holdp = sched(diskp)->holdp;
2496
2497     assert(holdp != NULL);
2498
2499     for( i = 0; holdp[i]; i++ ) { /* for each allocated disk */
2500         diff = holdp[i]->used - holdp[i]->reserved;
2501         total += holdp[i]->used;
2502         holdalloc(holdp[i]->disk)->allocated_space += diff;
2503         hqname = quote_string(holdp[i]->disk->name);
2504 #ifdef HOLD_DEBUG
2505         printf("%s: hdisk %s done, reserved " OFF_T_FMT " used " OFF_T_FMT " diff " OFF_T_FMT " alloc " OFF_T_FMT " dumpers %d\n",
2506                 debug_prefix_time(": adjust_diskspace"),
2507                 holdp[i]->disk->name,
2508                 (OFF_T_FMT_TYPE)holdp[i]->reserved,
2509                 (OFF_T_FMT_TYPE)holdp[i]->used,
2510                 (OFF_T_FMT_TYPE)diff,
2511                 (OFF_T_FMT_TYPE)holdalloc(holdp[i]->disk)->allocated_space,
2512                 holdalloc(holdp[i]->disk)->allocated_dumpers );
2513         fflush(stdout);
2514 #endif
2515         holdp[i]->reserved += diff;
2516         amfree(hqname);
2517     }
2518
2519     sched(diskp)->act_size = total;
2520
2521 #ifdef HOLD_DEBUG
2522     printf("%s: after: disk %s:%s used " OFF_T_FMT "\n",
2523            debug_prefix_time(": adjust_diskspace"),
2524            diskp->host->hostname, qname,
2525            (OFF_T_FMT_TYPE)sched(diskp)->act_size);
2526     fflush(stdout);
2527 #endif
2528     amfree(qdest);
2529     amfree(qname);
2530 }
2531
2532 static void
2533 delete_diskspace(
2534     disk_t *diskp)
2535 {
2536     assignedhd_t **holdp;
2537     int i;
2538
2539     holdp = sched(diskp)->holdp;
2540
2541     assert(holdp != NULL);
2542
2543     for( i = 0; holdp[i]; i++ ) { /* for each disk */
2544         /* find all files of this dump on that disk, and subtract their
2545          * reserved sizes from the disk's allocated space
2546          */
2547         holdalloc(holdp[i]->disk)->allocated_space -= holdp[i]->used;
2548     }
2549
2550     unlink_holding_files(holdp[0]->destname);   /* no need for the entire list,
2551                                                  * because unlink_holding_files
2552                                                  * will walk through all files
2553                                                  * using cont_filename */
2554     free_assignedhd(sched(diskp)->holdp);
2555     sched(diskp)->holdp = NULL;
2556     sched(diskp)->act_size = (off_t)0;
2557 }
2558
2559 static assignedhd_t **
2560 build_diskspace(
2561     char *      destname)
2562 {
2563     int i, j;
2564     int fd;
2565     ssize_t buflen;
2566     char buffer[DISK_BLOCK_BYTES];
2567     dumpfile_t file;
2568     assignedhd_t **result;
2569     holdingdisk_t *hdp;
2570     off_t *used;
2571     int num_holdingdisks=0;
2572     char dirname[1000], *ch;
2573     struct stat finfo;
2574     char *filename = destname;
2575
2576     memset(buffer, 0, sizeof(buffer));
2577     for(hdp = getconf_holdingdisks(); hdp != NULL; hdp = hdp->next) {
2578         num_holdingdisks++;
2579     }
2580     used = alloc(SIZEOF(off_t) * num_holdingdisks);
2581     for(i=0;i<num_holdingdisks;i++)
2582         used[i] = (off_t)0;
2583     result = alloc(SIZEOF(assignedhd_t *) * (num_holdingdisks + 1));
2584     result[0] = NULL;
2585     while(filename != NULL && filename[0] != '\0') {
2586         strncpy(dirname, filename, 999);
2587         dirname[999]='\0';
2588         ch = strrchr(dirname,'/');
2589         *ch = '\0';
2590         ch = strrchr(dirname,'/');
2591         *ch = '\0';
2592
2593         for(j = 0, hdp = getconf_holdingdisks(); hdp != NULL;
2594                                                  hdp = hdp->next, j++ ) {
2595             if(strcmp(dirname, holdingdisk_get_diskdir(hdp))==0) {
2596                 break;
2597             }
2598         }
2599
2600         if(stat(filename, &finfo) == -1) {
2601             fprintf(stderr, "stat %s: %s\n", filename, strerror(errno));
2602             finfo.st_size = (off_t)0;
2603         }
2604         used[j] += ((off_t)finfo.st_size+(off_t)1023)/(off_t)1024;
2605         if((fd = open(filename,O_RDONLY)) == -1) {
2606             fprintf(stderr,"build_diskspace: open of %s failed: %s\n",
2607                     filename, strerror(errno));
2608             return NULL;
2609         }
2610         if ((buflen = fullread(fd, buffer, SIZEOF(buffer))) > 0) {;
2611                 parse_file_header(buffer, &file, (size_t)buflen);
2612         }
2613         close(fd);
2614         filename = file.cont_filename;
2615     }
2616
2617     for(j = 0, i=0, hdp = getconf_holdingdisks(); hdp != NULL;
2618                                                   hdp = hdp->next, j++ ) {
2619         if(used[j] != (off_t)0) {
2620             result[i] = alloc(SIZEOF(assignedhd_t));
2621             result[i]->disk = hdp;
2622             result[i]->reserved = used[j];
2623             result[i]->used = used[j];
2624             result[i]->destname = stralloc(destname);
2625             result[i+1] = NULL;
2626             i++;
2627         }
2628     }
2629
2630     amfree(used);
2631     return result;
2632 }
2633
2634 static void
2635 holdingdisk_state(
2636     char *      time_str)
2637 {
2638     holdingdisk_t *hdp;
2639     int dsk;
2640     off_t diff;
2641
2642     printf("driver: hdisk-state time %s", time_str);
2643
2644     for(hdp = getconf_holdingdisks(), dsk = 0; hdp != NULL; hdp = hdp->next, dsk++) {
2645         diff = hdp->disksize - holdalloc(hdp)->allocated_space;
2646         printf(" hdisk %d: free " OFF_T_FMT " dumpers %d", dsk,
2647                (OFF_T_FMT_TYPE)diff, holdalloc(hdp)->allocated_dumpers);
2648     }
2649     printf("\n");
2650 }
2651
2652 static void
2653 update_failed_dump_to_tape(
2654     disk_t *    dp)
2655 {
2656 /* JLM
2657  * should simply set no_bump
2658  */
2659
2660     time_t save_timestamp = sched(dp)->timestamp;
2661     /* setting timestamp to 0 removes the current level from the
2662      * database, so that we ensure that it will not be bumped to the
2663      * next level on the next run.  If we didn't do this, dumpdates or
2664      * gnutar-lists might have been updated already, and a bumped
2665      * incremental might be created.  */
2666     sched(dp)->timestamp = 0;
2667     update_info_dumper(dp, (off_t)-1, (off_t)-1, (time_t)-1);
2668     sched(dp)->timestamp = save_timestamp;
2669 }
2670
2671 /* ------------------- */
2672 static int
2673 dump_to_tape(
2674     disk_t *    dp)
2675 {
2676     dumper_t *dumper;
2677     int failed = 0;
2678     off_t filenum;
2679     off_t origsize = (off_t)0;
2680     off_t dumpsize = (off_t)0;
2681     time_t dumptime = (time_t)0;
2682     double tapetime = 0.0;
2683     cmd_t cmd;
2684     int result_argc, rc;
2685     char *result_argv[MAX_ARGS+1];
2686     int dumper_tryagain = 0;
2687     char *qname;
2688
2689     qname = quote_string(dp->name);
2690     printf("driver: dumping %s:%s directly to tape\n",
2691            dp->host->hostname, qname);
2692     fflush(stdout);
2693
2694     /* pick a dumper and fail if there are no idle dumpers */
2695
2696     dumper = idle_dumper();
2697     if (!dumper) {
2698         printf("driver: no idle dumpers for %s:%s.\n", 
2699                 dp->host->hostname, qname);
2700         fflush(stdout);
2701         log_add(L_WARNING, "no idle dumpers for %s:%s.\n",
2702                 dp->host->hostname, qname);
2703         amfree(qname);
2704         return 2;       /* fatal problem */
2705     }
2706
2707     /* tell the taper to read from a port number of its choice */
2708
2709     taper_cmd(PORT_WRITE, dp, NULL, sched(dp)->level, sched(dp)->datestamp);
2710     cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2711     if(cmd != PORT) {
2712         printf("driver: did not get PORT from taper for %s:%s\n",
2713                 dp->host->hostname, qname);
2714         fflush(stdout);
2715         amfree(qname);
2716         return 2;       /* fatal problem */
2717     }
2718     /* copy port number */
2719     dumper->output_port = atoi(result_argv[2]);
2720
2721     /* tell the dumper to dump to a port */
2722
2723     dumper_cmd(dumper, PORT_DUMP, dp);
2724     dp->host->start_t = time(NULL) + 15;
2725
2726     /* update statistics & print state */
2727
2728     taper_busy = dumper->busy = 1;
2729     dp->host->inprogress += 1;
2730     dp->inprogress = 1;
2731     sched(dp)->timestamp = time((time_t *)0);
2732     allocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2733     idle_reason = NOT_IDLE;
2734
2735     short_dump_state();
2736
2737     /* wait for result from dumper */
2738
2739     cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
2740
2741     switch(cmd) {
2742     case BOGUS:
2743         /* either eof or garbage from dumper */
2744         log_add(L_WARNING, "%s pid %ld is messed up, ignoring it.\n",
2745                 dumper->name, (long)dumper->pid);
2746         dumper->down = 1;       /* mark it down so it isn't used again */
2747         failed = 1;     /* dump failed, must still finish up with taper */
2748         break;
2749
2750     case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
2751         /* everything went fine */
2752         origsize = (off_t)atof(result_argv[3]);
2753         /*dumpsize = (off_t)atof(result_argv[4]);*/
2754         dumptime = (time_t)atof(result_argv[5]);
2755         break;
2756
2757     case NO_ROOM: /* NO-ROOM <handle> */
2758         dumper_cmd(dumper, ABORT, dp);
2759         cmd = getresult(dumper->fd, 1, &result_argc, result_argv, MAX_ARGS+1);
2760         assert(cmd == ABORT_FINISHED);
2761
2762     case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
2763     default:
2764         /* dump failed, but we must still finish up with taper */
2765         /* problem with dump, possibly nonfatal, retry one time */
2766         sched(dp)->attempted++;
2767         failed = sched(dp)->attempted;
2768         dumper_tryagain = 1;
2769         break;
2770         
2771     case FAILED: /* FAILED <handle> <errstr> */
2772         /* dump failed, but we must still finish up with taper */
2773         failed = 2;     /* fatal problem with dump */
2774         break;
2775     }
2776
2777     /*
2778      * Note that at this point, even if the dump above failed, it may
2779      * not be a fatal failure if taper below says we can try again.
2780      * E.g. a dumper failure above may actually be the result of a
2781      * tape overflow, which in turn causes dump to see "broken pipe",
2782      * "no space on device", etc., since taper closed the port first.
2783      */
2784
2785     continue_port_dump:
2786
2787     cmd = getresult(taper, 1, &result_argc, result_argv, MAX_ARGS+1);
2788
2789     switch(cmd) {
2790     case PARTIAL:
2791     case DONE: /* DONE <handle> <label> <tape file> <err mess> */
2792         if(result_argc != 5) {
2793             error("error [dump to tape DONE result_argc != 5: %d]", result_argc);
2794             /*NOTREACHED*/
2795         }
2796
2797         if(failed == 1) goto tryagain;  /* dump didn't work */
2798         else if(failed == 2) goto failed_dumper;
2799
2800         free_serial(result_argv[2]);
2801
2802         if (*result_argv[5] == '"') {
2803             /* String was quoted */
2804             rc = sscanf(result_argv[5],"\"[sec %lf kb " OFF_T_FMT " ",
2805                         &tapetime, (OFF_T_FMT_TYPE *)&dumpsize);
2806         } else {
2807             /* String was not quoted */
2808             rc = sscanf(result_argv[5],"[sec %lf kb " OFF_T_FMT " ",
2809                         &tapetime, (OFF_T_FMT_TYPE *)&dumpsize);
2810         }
2811         if (rc < 2) {
2812             error("error [malformed result: %d items matched in '%s']",
2813                   rc, result_argv[5]);
2814             /*NOTREACHED*/
2815         }
2816
2817         if(cmd == DONE) {
2818             /* every thing went fine */
2819             update_info_dumper(dp, origsize, dumpsize, dumptime);
2820             filenum = OFF_T_ATOI(result_argv[4]);
2821             update_info_taper(dp, result_argv[3], filenum, sched(dp)->level);
2822             /* note that update_info_dumper() must be run before
2823                update_info_taper(), since update_info_dumper overwrites
2824                tape information.  */
2825         }
2826
2827         break;
2828
2829     case TRYAGAIN: /* TRY-AGAIN <handle> <err mess> */
2830         tape_left = tape_length;
2831         current_tape++;
2832         if(dumper_tryagain == 0) {
2833             sched(dp)->attempted++;
2834             if(sched(dp)->attempted > failed)
2835                 failed = sched(dp)->attempted;
2836         }
2837     tryagain:
2838         if(failed <= 1)
2839             headqueue_disk(&runq, dp);
2840     failed_dumper:
2841         update_failed_dump_to_tape(dp);
2842         free_serial(result_argv[2]);
2843         break;
2844
2845     case SPLIT_CONTINUE:  /* SPLIT_CONTINUE <handle> <new_label> */
2846         if (result_argc != 3) {
2847             error("error [taper SPLIT_CONTINUE result_argc != 3: %d]", result_argc);
2848             /*NOTREACHED*/
2849         }
2850         fprintf(stderr, "driver: Got SPLIT_CONTINUE %s %s\n",
2851                 result_argv[2], result_argv[3]);
2852         goto continue_port_dump;
2853
2854     case SPLIT_NEEDNEXT:
2855         fprintf(stderr, "driver: Got SPLIT_NEEDNEXT %s %s\n", result_argv[2], result_argv[3]);
2856
2857         goto continue_port_dump;
2858
2859     case TAPE_ERROR: /* TAPE-ERROR <handle> <err mess> */
2860     case BOGUS:
2861     default:
2862         update_failed_dump_to_tape(dp);
2863         free_serial(result_argv[2]);
2864         failed = 2;     /* fatal problem */
2865         start_degraded_mode(&runq);
2866         break;
2867     }
2868
2869     /* reset statistics & return */
2870
2871     taper_busy = dumper->busy = 0;
2872     dp->host->inprogress -= 1;
2873     dp->inprogress = 0;
2874     deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2875     amfree(qname);
2876
2877     return failed;
2878 }
2879
2880 static int
2881 queue_length(
2882     disklist_t  q)
2883 {
2884     disk_t *p;
2885     int len;
2886
2887     for(len = 0, p = q.head; p != NULL; len++, p = p->next)
2888         (void)len;      /* Quiet lint */
2889     return len;
2890 }
2891
2892 static void
2893 short_dump_state(void)
2894 {
2895     int i, nidle;
2896     char *wall_time;
2897
2898     wall_time = walltime_str(curclock());
2899
2900     printf("driver: state time %s ", wall_time);
2901     printf("free kps: %lu space: " OFF_T_FMT " taper: ",
2902            free_kps((interface_t *)0),
2903            (OFF_T_FMT_TYPE)free_space());
2904     if(degraded_mode) printf("DOWN");
2905     else if(!taper_busy) printf("idle");
2906     else printf("writing");
2907     nidle = 0;
2908     for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
2909     printf(" idle-dumpers: %d", nidle);
2910     printf(" qlen tapeq: %d", queue_length(tapeq));
2911     printf(" runq: %d", queue_length(runq));
2912     printf(" roomq: %d", queue_length(roomq));
2913     printf(" wakeup: %d", (int)sleep_time);
2914     printf(" driver-idle: %s\n", idle_strings[idle_reason]);
2915     interface_state(wall_time);
2916     holdingdisk_state(wall_time);
2917     fflush(stdout);
2918 }
2919
2920 #if 0
2921 static void
2922 dump_state(
2923     const char *str)
2924 {
2925     int i;
2926     disk_t *dp;
2927     char *qname;
2928
2929     printf("================\n");
2930     printf("driver state at time %s: %s\n", walltime_str(curclock()), str);
2931     printf("free kps: %lu, space: " OFF_T_FMT "\n",
2932            free_kps((interface_t *)0),
2933            (OFF_T_FMT_TYPE)free_space());
2934     if(degraded_mode) printf("taper: DOWN\n");
2935     else if(!taper_busy) printf("taper: idle\n");
2936     else printf("taper: writing %s:%s.%d est size " OFF_T_FMT "\n",
2937                 taper_disk->host->hostname, taper_disk->name,
2938                 sched(taper_disk)->level,
2939                 sched(taper_disk)->est_size);
2940     for(i = 0; i < inparallel; i++) {
2941         dp = dmptable[i].dp;
2942         if(!dmptable[i].busy)
2943           printf("%s: idle\n", dmptable[i].name);
2944         else
2945           qname = quote_string(dp->name);
2946           printf("%s: dumping %s:%s.%d est kps %d size " OFF_T_FMT " time %lu\n",
2947                 dmptable[i].name, dp->host->hostname, qname, sched(dp)->level,
2948                 sched(dp)->est_kps, sched(dp)->est_size, sched(dp)->est_time);
2949           amfree(qname);
2950     }
2951     dump_queue("TAPE", tapeq, 5, stdout);
2952     dump_queue("ROOM", roomq, 5, stdout);
2953     dump_queue("RUN ", runq, 5, stdout);
2954     printf("================\n");
2955     fflush(stdout);
2956 }
2957 #endif