2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1999 University of Maryland at College Park
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of U.M. not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. U.M. makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Authors: the Amanda Development Team. Its members are listed in a
24 * file named AUTHORS, in the root directory of this distribution.
27 * $Id: protocol.c,v 1.45 2006/05/25 17:07:31 martinea Exp $
29 * implements amanda protocol
38 #define proto_debug(i,x) do { \
39 if ((i) <= debug_protocol) { \
45 * Valid actions that can be passed to the state machine
60 * The current state type. States are represented as function
64 typedef p_action_t (*pstate_t)(struct proto *, p_action_t, pkt_t *);
67 * This is a request structure that is wrapped around a packet while it
68 * is being passed through amanda. It holds the timeouts, state, and handles
71 typedef struct proto {
72 pstate_t state; /* current state of the request */
73 char *hostname; /* remote host */
74 const security_driver_t *security_driver; /* for connect retries */
75 security_handle_t *security_handle; /* network stream for this req */
76 time_t timeout; /* seconds for this timeout */
77 time_t repwait; /* seconds to wait for reply */
78 time_t origtime; /* orig start time of this request */
79 time_t curtime; /* time when this attempt started */
80 int connecttries; /* times we'll retry a connect */
81 int resettries; /* times we'll resend a REQ */
82 int reqtries; /* times we'll wait for an a ACK */
83 pkt_t req; /* the actual wire request */
84 protocol_sendreq_callback continuation; /* call when req dies/finishes */
85 void *datap; /* opaque cookie passed to above */
86 char *(*conf_fn)(char *, void *); /* configuration function */
89 #define CONNECT_WAIT 5 /* secs between connect attempts */
90 #define ACK_WAIT 10 /* time (secs) to wait for ACK - keep short */
91 #define RESET_TRIES 2 /* num restarts (reboot/crash) */
92 #define CURTIME (time(0) - proto_init_time) /* time relative to start */
94 /* if no reply in an hour, just forget it */
95 #define DROP_DEAD_TIME(t) (CURTIME - (t) > (60 * 60))
97 /* get the size of an array */
98 #define ASIZE(arr) (int)(sizeof(arr) / sizeof((arr)[0]))
101 * Initialization time
103 static time_t proto_init_time;
105 /* local functions */
107 static const char *action2str(p_action_t);
108 static const char *pstate2str(pstate_t);
110 static void connect_callback(void *, security_handle_t *, security_status_t);
111 static void connect_wait_callback(void *);
112 static void recvpkt_callback(void *, pkt_t *, security_status_t);
114 static p_action_t s_sendreq(proto_t *, p_action_t, pkt_t *);
115 static p_action_t s_ackwait(proto_t *, p_action_t, pkt_t *);
116 static p_action_t s_repwait(proto_t *, p_action_t, pkt_t *);
117 static void state_machine(proto_t *, p_action_t, pkt_t *);
120 * -------------------
121 * Interface functions
125 * Initialize globals.
131 proto_init_time = time(NULL);
135 * Generate a request packet, and submit it to the state machine
140 const char * hostname,
141 const security_driver_t * security_driver,
142 char * (*conf_fn)(char *, void *),
145 protocol_sendreq_callback continuation,
150 p = alloc(SIZEOF(proto_t));
151 p->state = s_sendreq;
152 p->hostname = stralloc(hostname);
153 p->security_driver = security_driver;
154 /* p->security_handle set in connect_callback */
155 p->repwait = repwait;
156 p->origtime = CURTIME;
157 /* p->curtime set in the sendreq state */
158 p->connecttries = getconf_int(CNF_CONNECT_TRIES);
159 p->resettries = RESET_TRIES;
160 p->reqtries = getconf_int(CNF_REQ_TRIES);
161 p->conf_fn = conf_fn;
162 pkt_init(&p->req, P_REQ, req);
165 * These are here for the caller
166 * We call the continuation function after processing is complete.
167 * We pass the datap on through untouched. It is here so the caller
168 * has a way to keep state with each request.
170 p->continuation = continuation;
173 proto_debug(1, ("%s: security_connect: host %s -> p %p\n",
174 debug_prefix_time(": protocol"), hostname, p));
176 security_connect(p->security_driver, p->hostname, conf_fn, connect_callback,
181 * This is a callback for security_connect. After the security layer
182 * has initiated a connection to the given host, this will be called
183 * with a security_handle_t.
185 * On error, the security_status_t arg will reflect errors which can
186 * be had via security_geterror on the handle.
191 security_handle_t * security_handle,
192 security_status_t status)
197 p->security_handle = security_handle;
199 proto_debug(1, ("%s: connect_callback: p %p\n",
200 debug_prefix_time(": protocol"), p));
204 state_machine(p, PA_START, NULL);
208 security_seterror(p->security_handle, "timeout during connect");
213 * For timeouts or errors, retry a few times, waiting CONNECT_WAIT
214 * seconds between each attempt. If they all fail, just return
215 * an error back to the caller.
217 if (--p->connecttries == 0) {
218 state_machine(p, PA_ABORT, NULL);
220 proto_debug(1, ("%s: connect_callback: p %p: retrying %s\n",
221 debug_prefix_time(": protocol"), p, p->hostname));
222 security_close(p->security_handle);
223 /* XXX overload p->security handle to hold the event handle */
225 (security_handle_t *)event_register(CONNECT_WAIT, EV_TIME,
226 connect_wait_callback, p);
237 * This gets called when a host has been put on a wait queue because
238 * initial connection attempts failed.
241 connect_wait_callback(
246 event_release((event_handle_t *)p->security_handle);
247 security_connect(p->security_driver, p->hostname, p->conf_fn,
248 connect_callback, p, p->datap);
253 * Does a one pass protocol sweep. Handles any incoming packets that
254 * are waiting to be processed, and then deals with any pending
255 * requests that have timed out.
257 * Callers should periodically call this after they have submitted
258 * requests if they plan on doing a lot of work.
264 /* arg == 1 means don't block */
270 * Does an infinite pass protocol sweep. This doesn't return until all
271 * requests have been satisfied or have timed out.
273 * Callers should call this after they have finished submitting requests
274 * and are just waiting for all of the answers to come back.
280 /* arg == 0 means block forever until no more events are left */
291 * The guts of the protocol. This handles the many paths a request can
292 * make, including retrying the request and acknowledgements, and dealing
293 * with timeouts and successfull replies.
302 p_action_t retaction;
304 proto_debug(1, ("protocol: state_machine: initial: p %p action %s pkt %p\n",
305 p, action2str(action), (void *)NULL));
308 assert(action == PA_RCVDATA || pkt == NULL);
309 assert(p->state != NULL);
312 proto_debug(1, ("%s: state_machine: p %p state %s action %s\n",
313 debug_prefix_time(": protocol"),
314 p, pstate2str(p->state), action2str(action)));
316 proto_debug(1, ("%s: pkt: %s (t %d) orig REQ (t %d cur %d)\n",
317 debug_prefix_time(": protocol"),
318 pkt_type2str(pkt->type), (int)CURTIME,
319 (int)p->origtime, (int)p->curtime));
320 proto_debug(1, ("%s: pkt contents:\n-----\n%s-----\n",
321 debug_prefix_time(": protocol"), pkt->body));
325 * p->state is a function pointer to the current state a request
328 * We keep track of the last state we were in so we can make
329 * sure states which return PA_CONTINUE really have transitioned
330 * the request to a new state.
334 if (action == PA_ABORT)
336 * If the passed action indicates a terminal error, then we
337 * need to move to abort right away.
339 retaction = PA_ABORT;
342 * Else we run the state and perform the action it
345 retaction = (*curstate)(p, action, pkt);
347 proto_debug(1, ("%s: state_machine: p %p state %s returned %s\n",
348 debug_prefix_time(": protocol"),
349 p, pstate2str(p->state), action2str(retaction)));
352 * The state function is expected to return one of the following
358 * Request is still waiting for more data off of the network.
359 * Setup to receive another pkt, and wait for the recv event
363 (*p->continuation)(p->datap, pkt, p->security_handle);
367 proto_debug(1, ("%s: state_machine: p %p state %s: timeout %d\n",
368 debug_prefix_time(": protocol"),
369 p, pstate2str(p->state), (int)p->timeout));
371 * Get the security layer to register a receive event for this
372 * security handle on our behalf. Have it timeout in p->timeout
375 security_recvpkt(p->security_handle, recvpkt_callback, p,
381 * Request has moved to another state. Loop and run it again.
384 assert(p->state != curstate);
385 proto_debug(1, ("%s: state_machine: p %p: moved from %s to %s\n",
386 debug_prefix_time(": protocol"),
387 p, pstate2str(curstate),
388 pstate2str(p->state)));
392 * Request has failed in some way locally. The security_handle will
393 * contain an appropriate error message via security_geterror(). Set
394 * pkt to NULL to indicate failure to the callback, and then
395 * fall through to the common finish code.
397 * Note that remote failures finish via PA_FINISH, because they did
398 * complete successfully locally.
405 * Request has completed successfully.
406 * Free up resources the request has used, call the continuation
407 * function specified by the caller and quit.
410 (*p->continuation)(p->datap, pkt, p->security_handle);
411 security_close(p->security_handle);
419 break; /* in case asserts are turned off */
427 * The request send state. Here, the packet is actually transmitted
428 * across the network. After setting up timeouts, the request
429 * moves to the acknowledgement wait state. We return from the state
430 * machine at this point, and let the request be received from the network.
440 (void)action; /* Quiet unused parameter warning */
441 (void)pkt; /* Quiet unused parameter warning */
443 if (security_sendpkt(p->security_handle, &p->req) < 0) {
444 /* XXX should retry */
445 security_seterror(p->security_handle, "error sending REQ: %s",
446 security_geterror(p->security_handle));
451 * Remember when this request was first sent
453 p->curtime = CURTIME;
456 * Move to the ackwait state
458 p->state = s_ackwait;
459 p->timeout = ACK_WAIT;
464 * The acknowledge wait state. We can enter here two ways:
466 * - the caller has received a packet, located the request for
467 * that packet, and called us with an action of PA_RCVDATA.
469 * - the caller has determined that a request has timed out,
470 * and has called us with PA_TIMEOUT.
472 * Here we process the acknowledgment, which usually means that
473 * the client has agreed to our request and is working on it.
474 * It will later send a reply when finished.
486 * The timeout case. If our retry count has gone to zero
487 * fail this request. Otherwise, move to the send state
488 * to retry the request.
490 if (action == PA_TIMEOUT) {
493 if (--p->reqtries == 0) {
494 security_seterror(p->security_handle, "timeout waiting for ACK");
498 p->state = s_sendreq;
499 return (PA_CONTINUE);
502 assert(action == PA_RCVDATA);
506 * The packet-received state. Determine what kind of
507 * packet we received, and act based on the reply type.
512 * Received an ACK. Everything's good. The client is
513 * now working on the request. We queue up again and
514 * wait for the reply.
517 p->state = s_repwait;
518 p->timeout = p->repwait;
522 * Received a NAK. The request failed, so free up the
523 * resources associated with it and return.
525 * This should NOT return PA_ABORT because it is not a local failure.
531 * The client skipped the ACK, and replied right away.
532 * Move to the reply state to handle it.
536 p->state = s_repwait;
537 return (PA_CONTINUE);
540 * Unexpected packet. Requeue this request and hope
541 * we get what we want later.
549 * The reply wait state. We enter here much like we do with s_ackwait.
560 * Timeout waiting for a reply.
562 if (action == PA_TIMEOUT) {
566 * If we've blown our timeout limit, free up this packet and
569 if (p->resettries == 0 || DROP_DEAD_TIME(p->origtime)) {
570 security_seterror(p->security_handle, "timeout waiting for REP");
575 * We still have some tries left. Resend the request.
578 p->state = s_sendreq;
579 p->reqtries = getconf_int(CNF_REQ_TRIES);
580 return (PA_CONTINUE);
583 assert(action == PA_RCVDATA);
586 * We've received some data. If we didn't get a reply,
587 * requeue the packet and retry. Otherwise, acknowledge
588 * the reply, cleanup this packet, and return.
590 if (pkt->type != P_REP && pkt->type != P_PREP)
593 if(pkt->type == P_REP) {
594 pkt_init_empty(&ack, P_ACK);
595 if (security_sendpkt(p->security_handle, &ack) < 0) {
596 /* XXX should retry */
598 security_seterror(p->security_handle, "error sending ACK: %s",
599 security_geterror(p->security_handle));
605 else if(pkt->type == P_PREP) {
606 p->timeout = p->repwait - CURTIME + p->curtime + 1;
607 return (PA_CONTPEND);
610 /* should never go here, shut up compiler warning */
615 * event callback that receives a packet
621 security_status_t status)
629 state_machine(p, PA_RCVDATA, pkt);
632 state_machine(p, PA_TIMEOUT, NULL);
635 state_machine(p, PA_ABORT, NULL);
649 * Convert a pstate_t into a printable form.
655 static const struct {
659 #define X(s) { s, stringize(s) }
667 for (i = 0; i < ASIZE(pstates); i++)
668 if (pstate == pstates[i].type)
669 return (pstates[i].name);
670 return ("BOGUS PSTATE");
674 * Convert an p_action_t into a printable form
680 static const struct {
684 #define X(s) { s, stringize(s) }
698 for (i = 0; i < ASIZE(actions); i++)
699 if (action == actions[i].type)
700 return (actions[i].name);
701 return ("BOGUS ACTION");