10 #include <sys/ioctl.h>
16 #include "output-rait.h"
17 #include "output-tape.h"
20 #define amfree(x) do { \
22 int save_errno = errno; \
28 #define tape_open open
29 #define tapefd_read read
30 #define tapefd_write write
31 #define tapefd_close close
32 #define tape_access access
33 #define tape_stat stat
34 #define tapefd_fsf tape_tapefd_fsf
35 #define tapefd_rewind tape_tapefd_rewind
36 #define tapefd_status tape_tapefd_status
37 #define tapefd_unload tape_tapefd_unload
38 #define tapefd_weof tape_tapefd_weof
40 int tapeio_init_devname (char * dev,
44 char *tapeio_next_devname (char * dev_left,
50 ** RAIT -- redundant array of (inexpensive?) tapes
52 ** Author: Marc Mengel <mengel@fnal.gov>
54 ** This package provides for striping input/output across
55 ** multiple tape drives.
59 rait.c..................................................1
60 MAX_RAITS.........................................2
61 rait_table........................................2
62 rait_open(char *dev, int flags, mode_t mode)......2
63 rait_close(int fd)................................3
64 rait_lseek(int fd, long pos, int whence)..........4
65 rait_write(int fd, const char *buf, size_t len) ..5
66 rait_read(int fd, char *buf, size_t len)..........6
67 rait_ioctl(int fd, int op, void *p)...............8
68 rait_access(devname, R_OK|W_OK)...................8
69 rait_stat(devname, struct statbuf*)...............8
70 rait_copy(char *f1, char *f2).....................9
72 rait_tapefd_fsf(rait_tapefd, count)..........10
73 rait_tapefd_rewind(rait_tapefd)..............10
74 rait_tapefd_resetofs(rait_tapefd)............10
75 rait_tapefd_unload(rait_tapefd)..............10
76 rait_tapefd_status(rait_tapefd, stat)........10
77 rait_tapefd_weof(rait_tapefd, count).........10
79 rait.h.................................................1
80 typedef RAIT......................................1
81 ifdef RAIT_REDIRECT...............................1
82 open.........................................1
83 close........................................1
84 ioctl........................................1
85 read.........................................1
86 write........................................1
92 ** rait_open takes a string like:
93 ** "/dev/rmt/tps0d{3,5,7,19}nrnsv"
95 ** "/dev/rmt/tps0d3nrnsv"
96 ** "/dev/rmt/tps0d5nrnsv"
97 ** "/dev/rmt/tps0d7nrnsv"
98 ** "/dev/rmt/tps0d19nrnsv"
101 ** If it has no curly brace, we treat it as a plain device,
102 ** and do a normal open, and do normal operations on it.
106 #define rait_debug(p) do { \
107 int save_errno = errno; \
109 if (0!=getenv("RAIT_DEBUG")) { \
112 errno = save_errno; \
115 #define rait_debug(p)
118 static RAIT *rait_table = 0; /* table to keep track of RAITS */
119 static size_t rait_table_count;
123 * amtable_alloc -- (re)allocate enough space for some number of elements.
125 * input: table -- pointer to pointer to table
126 * current -- pointer to current number of elements
127 * elsize -- size of a table element
128 * count -- desired number of elements
129 * bump -- round up factor
130 * output: table -- possibly adjusted to point to new table area
131 * current -- possibly adjusted to new number of elements
146 if (count >= *current) {
147 table_count_new = ((count + bump) / bump) * bump;
148 table_new = alloc(table_count_new * elsize);
150 memcpy(table_new, *table, *current * elsize);
154 memset(((char *)*table) + *current * elsize,
156 (table_count_new - *current) * elsize);
157 *current = table_count_new;
163 * amtable_free -- release a table.
165 * input: table -- pointer to pointer to table
166 * current -- pointer to current number of elements
167 * output: table -- possibly adjusted to point to new table area
168 * current -- possibly adjusted to new number of elements
181 #define rait_table_alloc(fd) amtable_alloc((void **)rait_table_p, \
183 SIZEOF(*rait_table), \
194 int fd; /* the file descriptor number to return */
195 RAIT *res; /* resulting RAIT structure */
196 char *dev_left; /* string before { */
197 char *dev_right; /* string after } */
198 char *dev_next; /* string inside {} */
199 char *dev_real; /* parsed device name */
200 int rait_flag; /* true if RAIT syntax in dev */
203 RAIT **rait_table_p = &rait_table;
206 rait_debug((stderr,"rait_open( %s, %d, %d )\n", dev, flags, mask));
208 rait_flag = (0 != strchr(dev, '{'));
213 ** we have to return a valid file descriptor, so use
214 ** a dummy one to /dev/null
216 fd = open("/dev/null",flags,mask);
220 ** call the normal tape_open function if we are not
223 fd = tape_open(dev,flags,mask);
226 rait_debug((stderr, "rait_open:returning %d: %s\n",
232 if(0 != rait_table_alloc(fd + 1)) {
234 (void)tapefd_close(fd);
236 rait_debug((stderr, "rait_open:returning %d: %s\n",
242 res = &rait_table[fd];
244 memset(res, 0, SIZEOF(*res));
250 /* copy and parse the dev string so we can scribble on it */
253 rait_debug((stderr, "rait_open:returning %d: %s\n",
255 "out of stralloc memory"));
258 if (0 != tapeio_init_devname(dev, &dev_left, &dev_right, &dev_next)) {
259 rait_debug((stderr, "rait_open:returning %d: %s\n",
265 while (0 != (dev_real = tapeio_next_devname(dev_left, dev_right, &dev_next))) {
267 r = amtable_alloc((void **)fds_p,
270 (size_t)res->nfds + 1,
274 (void)rait_close(fd);
279 res->fds[ res->nfds ] = tape_open(dev_real,flags,mask);
280 rait_debug((stderr,"rait_open:opening %s yields %d\n",
281 dev_real, res->fds[res->nfds] ));
282 if ( res->fds[res->nfds] < 0 ) {
284 (void)rait_close(fd);
290 tapefd_set_master_fd(res->fds[res->nfds], fd);
295 /* clean up our copied string */
301 ** set things up to treat this as a normal tape if we ever
302 ** come in here again
307 r = amtable_alloc((void **)fds_p,
310 (size_t)res->nfds + 1,
314 (void)tapefd_close(fd);
315 memset(res, 0, SIZEOF(*res));
319 res->fds[res->nfds] = fd;
324 if (fd >= 0 && res->nfds > 0) {
325 res->readres = alloc(res->nfds * SIZEOF(*res->readres));
326 memset(res->readres, 0, res->nfds * SIZEOF(*res->readres));
329 rait_debug((stderr, "rait_open:returning %d%s%s\n",
331 (fd < 0) ? ": " : "",
332 (fd < 0) ? strerror(errno) : ""));
346 ** find the first { and then the first } that follows it
348 if ( 0 == (*dev_next = strchr(dev, '{'))
349 || 0 == (*dev_right = strchr(*dev_next + 1, '}')) ) {
350 /* we dont have a {} pair */
356 *dev_left = dev; /* before the { */
357 **dev_next = 0; /* zap the { */
359 (*dev_right)++; /* after the } */
374 if (0 != (*dev_next = strchr(next, ','))
375 || 0 != (*dev_next = strchr(next, '}'))){
377 **dev_next = 0; /* zap the terminator */
381 ** we have one string picked out, build it into the buffer
383 len = strlen(dev_left) + strlen(next) + strlen(dev_right) + 1;
384 dev_real = alloc(len);
385 strcpy(dev_real, dev_left); /* safe */
386 strcat(dev_real, next); /* safe */
387 strcat(dev_real, dev_right); /* safe */
394 ** close everything we opened and free our memory.
400 int i; /* index into RAIT drives */
401 int j; /* individual tapefd_close result */
402 int res; /* result from close */
403 RAIT *pr; /* RAIT entry from table */
404 int save_errno = errno;
408 rait_debug((stderr,"rait_close( %d )\n", fd));
410 if ((fd < 0) || ((size_t)fd >= rait_table_count)) {
412 rait_debug((stderr, "rait_close:returning %d: %s\n",
418 pr = &rait_table[fd];
419 if (0 == pr->nopen) {
421 rait_debug((stderr, "rait_close:returning %d: %s\n",
427 if (0 == pr->readres && 0 < pr->nfds) {
428 pr->readres = alloc(pr->nfds * SIZEOF(*pr->readres));
429 memset(pr->readres, 0, pr->nfds * SIZEOF(*pr->readres));
434 ** this looks strange, but we start kids who are going to close the
435 ** drives in parallel just after the parent has closed their copy of
436 ** the descriptor. ('cause closing tape devices usually causes slow
437 ** activities like filemark writes, etc.)
439 for( i = 0; i < pr->nfds; i++ ) {
440 if(tapefd_can_fork(pr->fds[i])) {
441 if ((kid = fork()) == 0) {
442 /* we are the child process */
444 j = tapefd_close(pr->fds[i]);
447 /* remember who the child is or that an error happened */
448 pr->readres[i] = (ssize_t)kid;
452 j = tapefd_close(pr->fds[i]);
459 for( i = 0; i < pr->nfds; i++ ) {
460 j = tapefd_close(pr->fds[i]);
465 for( i = 0; i < pr->nfds; i++ ) {
467 if(pr->readres[i] != -1) {
468 waitpid((pid_t)pr->readres[i], &stat, 0);
469 if( WEXITSTATUS(stat) != 0 ) {
470 res = WEXITSTATUS(stat);
477 (void)close(fd); /* close the dummy /dev/null descriptor */
481 amtable_free((void **)fds_p, &pr->fd_count);
483 if (0 != pr->readres) {
486 if (0 != pr->xorbuf) {
491 rait_debug((stderr, "rait_close:returning %d%s%s\n",
493 (res < 0) ? ": " : "",
494 (res < 0) ? strerror(errno) : ""));
501 ** seek out to the nth byte on the RAIT set.
502 ** this is assumed to be evenly divided across all the stripes
510 int i; /* drive number in RAIT */
511 off_t res, /* result of lseeks */
512 total; /* total of results */
513 RAIT *pr; /* RAIT slot in table */
515 rait_debug((stderr, "rait_lseek(%d," OFF_T_FMT ",%d)\n",
516 fd, (OFF_T_FMT_TYPE)pos, whence));
518 if ((fd < 0) || ((size_t)fd >= rait_table_count)) {
520 rait_debug((stderr, "rait_lseek:returning %d: %s\n",
526 pr = &rait_table[fd];
527 if (0 == pr->nopen) {
529 rait_debug((stderr, "rait_lseek:returning %d: %s\n",
535 if ((pr->nfds > 1) && ((pos % (off_t)(pr->nfds-1)) != (off_t)0)) {
540 pos = pos / (off_t)pr->nfds;
541 for( i = 0; i < pr->nfds; i++ ) {
542 if ((off_t)0 >= (res = lseek(pr->fds[i], pos, whence))) {
549 rait_debug((stderr, "rait_lseek:returning %ld%s%s\n",
551 (total < 0) ? ": " : "",
552 (total < 0) ? strerror(errno) : ""));
559 ** if we only have one stream, just do a write,
560 ** otherwise compute an xor sum, and do several
569 const char *buf = bufptr;
570 int i; /* drive number */
571 size_t j; /* byte offset */
572 RAIT *pr; /* RAIT structure for this RAIT */
575 int data_fds; /* number of data stream file descriptors */
577 rait_debug((stderr, "rait_write(%d,%lx,%d)\n",fd,(unsigned long)buf,len));
579 if ((fd < 0) || ((size_t)fd >= rait_table_count)) {
581 rait_debug((stderr, "rait_write:returning %d: %s\n",
587 pr = &rait_table[fd];
588 if (0 == pr->nopen) {
590 rait_debug((stderr, "rait_write:returning %d: %s\n",
596 /* need to be able to slice it up evenly... */
598 data_fds = pr->nfds - 1;
599 if (0 != len % data_fds) {
601 rait_debug((stderr, "rait_write:returning %d: %s\n",
606 /* each slice gets an even portion */
607 len = len / data_fds;
609 /* make sure we have enough buffer space */
610 if (len > (size_t)pr->xorbuflen) {
611 if (0 != pr->xorbuf) {
614 pr->xorbuf = alloc(len);
618 /* compute the sum */
619 memcpy(pr->xorbuf, buf, len);
620 for( i = 1; i < data_fds; i++ ) {
621 for( j = 0; j < len; j++ ) {
622 pr->xorbuf[j] ^= buf[len * i + j];
629 /* write the chunks in the main buffer */
630 for( i = 0; i < data_fds; i++ ) {
631 res = tapefd_write(pr->fds[i], buf + len*i , len);
632 rait_debug((stderr, "rait_write: write(%d,%lx,%d) returns %d%s%s\n",
634 (unsigned long)(buf + len*i),
637 (res < 0) ? ": " : "",
638 (res < 0) ? strerror(errno) : ""));
645 if (total >= 0 && pr->nfds > 1) {
646 /* write the sum, don't include it in the total bytes written */
647 res = tapefd_write(pr->fds[i], pr->xorbuf, len);
648 rait_debug((stderr, "rait_write: write(%d,%lx,%d) returns %d%s%s\n",
650 (unsigned long)pr->xorbuf,
653 (res < 0) ? ": " : "",
654 (res < 0) ? strerror(errno) : ""));
660 rait_debug((stderr, "rait_write:returning %d%s%s\n",
662 (total < 0) ? ": " : "",
663 (total < 0) ? strerror(errno) : ""));
671 ** once again, if there is one data stream do a read, otherwise
672 ** do all n reads, and if any of the first n - 1 fail, compute
673 ** the missing block from the other three, then return the data.
674 ** there's some silliness here for reading tape with bigger buffers
675 ** than we wrote with, (thus the extra bcopys down below). On disk if
676 ** you read with a bigger buffer size than you wrote with, you just
677 ** garble the data...
686 int nerrors, neofs, errorblock;
692 int save_errno = errno;
693 ssize_t maxreadres = 0;
694 int sum_mismatch = 0;
696 rait_debug((stderr, "rait_read(%d,%lx,%d)\n",fd,(unsigned long)buf,len));
698 if ((fd < 0) || ((size_t)fd >= rait_table_count)) {
700 rait_debug((stderr, "rait_read:returning %d: %s\n",
706 pr = &rait_table[fd];
707 if (0 == pr->nopen) {
709 rait_debug((stderr, "rait_read:returning %d: %s\n",
718 /* once again , we slice it evenly... */
720 data_fds = pr->nfds - 1;
721 if (0 != len % data_fds) {
723 rait_debug((stderr, "rait_read:returning %d: %s\n",
728 len = len / data_fds;
733 /* try all the reads, save the result codes */
734 /* count the eof/errors */
735 for( i = 0; i < data_fds; i++ ) {
736 pr->readres[i] = tapefd_read(pr->fds[i], buf + len*i , len);
737 rait_debug((stderr, "rait_read: read on fd %d returns %d%s%s\n",
740 (pr->readres[i] < 0) ? ": " : "",
741 (pr->readres[i] < 0) ? strerror(errno) : ""));
742 if ( pr->readres[i] <= 0 ) {
743 if ( pr->readres[i] == 0 ) {
752 } else if (pr->readres[i] > maxreadres) {
753 maxreadres = pr->readres[i];
757 /* make sure we have enough buffer space */
758 if (len > (size_t)pr->xorbuflen) {
759 if (0 != pr->xorbuf) {
762 pr->xorbuf = alloc(len);
765 pr->readres[i] = tapefd_read(pr->fds[i], pr->xorbuf , len);
766 rait_debug((stderr, "rait_read: read on fd %d returns %d%s%s\n",
769 (pr->readres[i] < 0) ? ": " : "",
770 (pr->readres[i] < 0) ? strerror(errno) : ""));
774 * Make sure all the reads were the same length
776 for (j = 0; j < (size_t)pr->nfds; j++) {
777 if (pr->readres[j] != maxreadres) {
784 * If no errors, check that the xor sum matches
786 if ( nerrors == 0 && pr->nfds > 1 ) {
787 for(i = 0; i < (int)maxreadres; i++ ) {
789 for(j = 0; (j + 1) < (size_t)pr->nfds; j++) {
790 sum ^= (buf + len * j)[i];
792 if (sum != pr->xorbuf[i]) {
799 ** now decide what "really" happened --
800 ** all n getting eof is a "real" eof
801 ** just one getting an error/eof is recoverable if we are doing RAIT
802 ** anything else fails
805 if (neofs == pr->nfds) {
806 rait_debug((stderr, "rait_read:returning 0\n"));
812 rait_debug((stderr, "rait_read:returning %d: %s\n",
814 "XOR block mismatch"));
818 if (nerrors > 1 || (pr->nfds <= 1 && nerrors > 0)) {
820 rait_debug((stderr, "rait_read:returning %d: %s\n",
827 ** so now if we failed on a data block, we need to do a recovery
828 ** if we failed on the xor block -- who cares?
830 if (nerrors == 1 && pr->nfds > 1 && errorblock != pr->nfds-1) {
832 rait_debug((stderr, "rait_read: fixing data from fd %d\n",
833 pr->fds[errorblock]));
835 /* the reads were all *supposed* to be the same size, so... */
836 pr->readres[errorblock] = maxreadres;
838 /* fill it in first with the xor sum */
839 memcpy(buf + len * errorblock, pr->xorbuf, len);
841 /* xor back out the other blocks */
842 for( i = 0; i < data_fds; i++ ) {
843 if( i != errorblock ) {
844 for( j = 0; j < len ; j++ ) {
845 buf[j + len * errorblock] ^= buf[j + len * i];
849 /* there, now the block is back as if it never failed... */
852 /* pack together partial reads... */
853 total = pr->readres[0];
854 for( i = 1; i < data_fds; i++ ) {
855 if (total != (ssize_t)(len * i)) {
856 memmove(buf + total, buf + len*i, (size_t)pr->readres[i]);
858 total += pr->readres[i];
861 rait_debug((stderr, "rait_read:returning %d%s%s\n",
863 (total < 0) ? ": " : "",
864 (total < 0) ? strerror(errno) : ""));
881 rait_debug((stderr, "rait_ioctl(%d,%d)\n",fd,op));
883 if ((fd < 0) || ((size_t)fd >= rait_table_count)) {
885 rait_debug((stderr, "rait_ioctl:returning %d: %s\n",
891 pr = &rait_table[fd];
892 if (0 == pr->nopen) {
894 rait_debug((stderr, "rait_ioctl:returning %d: %s\n",
900 for( i = 0; i < pr->nfds ; i++ ) {
902 res = ioctl(pr->fds[i], op, p);
913 rait_debug((stderr, "rait_ioctl: returning %d%s%s\n",
915 (res < 0) ? ": " : "",
916 (res < 0) ? strerror(errno) : ""));
922 ** access() all the devices, returning if any fail
930 char *dev_left; /* string before { */
931 char *dev_right; /* string after } */
932 char *dev_next; /* string inside {} */
933 char *dev_real; /* parsed device name */
935 /* copy and parse the dev string so we can scribble on it */
936 devname = stralloc(devname);
938 rait_debug((stderr, "rait_access:returning %d: %s\n",
940 "out of stralloc memory"));
943 if ( 0 != tapeio_init_devname(devname, &dev_left, &dev_right, &dev_next)) {
944 rait_debug((stderr, "rait_access:returning %d: %s\n",
950 while( 0 != (dev_real = tapeio_next_devname(dev_left, dev_right, &dev_next))) {
951 res = tape_access(dev_real, flags);
952 rait_debug((stderr,"rait_access:access( %s, %d ) yields %d\n",
953 dev_real, flags, res ));
961 rait_debug((stderr, "rait_access: returning %d%s%s\n",
963 (res < 0) ? ": " : "",
964 (res < 0) ? strerror(errno) : ""));
970 ** stat all the devices, returning the last one unless one fails
978 char *dev_left; /* string before { */
979 char *dev_right; /* string after } */
980 char *dev_next; /* string inside {} */
981 char *dev_real; /* parsed device name */
983 /* copy and parse the dev string so we can scribble on it */
984 devname = stralloc(devname);
986 rait_debug((stderr, "rait_access:returning %d: %s\n",
988 "out of stralloc memory"));
991 if ( 0 != tapeio_init_devname(devname, &dev_left, &dev_right, &dev_next)) {
992 rait_debug((stderr, "rait_access:returning %d: %s\n",
998 while( 0 != (dev_real = tapeio_next_devname(dev_left, dev_right, &dev_next))) {
999 res = tape_stat(dev_real, buf);
1000 rait_debug((stderr,"rait_stat:stat( %s ) yields %d (%s)\n",
1001 dev_real, res, (res != 0) ? strerror(errno) : "no error" ));
1009 rait_debug((stderr, "rait_access: returning %d%s%s\n",
1011 (res < 0) ? ": " : "",
1012 (res < 0) ? strerror(errno) : ""));
1031 t1 = rait_open(f1,O_RDONLY,0644);
1035 t2 = rait_open(f2,O_CREAT|O_RDWR,0644);
1038 (void)rait_close(t1);
1042 buf = alloc(buflen);
1044 len = rait_read(t1,buf,buflen);
1046 wres = rait_write(t2, buf, (size_t)len);
1055 (void)rait_close(t1);
1056 (void)rait_close(t2);
1058 return (len < 0) ? -1 : 0;
1064 ** Amanda Tape API routines:
1070 int (*func1)(int, off_t),
1080 rait_debug((stderr, "rait_tapefd_ioctl(%d,%d)\n",fd,count));
1082 if ((fd < 0) || ((size_t)fd >= rait_table_count)) {
1084 rait_debug((stderr, "rait_tapefd_ioctl:returning %d: %s\n",
1090 pr = &rait_table[fd];
1091 if (0 == pr->nopen) {
1093 rait_debug((stderr, "rait_tapefd_ioctl:returning %d: %s\n",
1099 if (0 == pr->readres && 0 < pr->nfds) {
1100 pr->readres = alloc(pr->nfds * SIZEOF(*pr->readres));
1101 memset(pr->readres, 0, pr->nfds * SIZEOF(*pr->readres));
1104 for( i = 0; i < pr->nfds ; i++ ) {
1105 if(tapefd_can_fork(pr->fds[i])) {
1106 if ((kid = fork()) < 1) {
1107 rait_debug((stderr, "in kid, fork returned %d\n", kid));
1108 /* if we are the kid, or fork failed do the action */
1109 if (func0 != NULL) {
1110 res = (*func0)(pr->fds[i]);
1112 res = (*func1)(pr->fds[i], count);
1114 rait_debug((stderr, "in kid, func (%d) returned %d errno %s\n",
1115 pr->fds[i], res, strerror(errno)));
1119 rait_debug((stderr, "in parent, fork returned %d\n", kid));
1120 pr->readres[i] = (ssize_t)kid;
1125 j = (*func0)(pr->fds[i]);
1127 j = (*func1)(pr->fds[i], count);
1132 pr->readres[i] = -1;
1135 for( i = 0; i < pr->nfds ; i++ ) {
1136 if(tapefd_can_fork(pr->fds[i])) {
1137 rait_debug((stderr, "in parent, waiting for %d\n", pr->readres[i]));
1138 waitpid((pid_t)pr->readres[i], &status, 0);
1139 if( WEXITSTATUS(status) != 0 ) {
1140 res = WEXITSTATUS(status);
1144 rait_debug((stderr, "in parent, return code was %d\n", res));
1155 rait_debug((stderr, "rait_tapefd_ioctl: returning %d%s%s\n",
1157 (res < 0) ? ": " : "",
1158 (res < 0) ? strerror(errno) : ""));
1168 return rait_tapefd_ioctl(NULL, tapefd_fsf, fd, count);
1175 return rait_tapefd_ioctl(tapefd_rewind, NULL, fd, (off_t)-1);
1182 return rait_tapefd_ioctl(tapefd_unload, NULL, fd, (off_t)-1);
1190 return rait_tapefd_ioctl(NULL, tapefd_weof, fd, count);
1199 return rait_open(name, flags, mask);
1205 struct am_mt_status *stat)
1212 rait_debug((stderr, "rait_tapefd_status(%d)\n",fd));
1214 if ((fd < 0) || ((size_t)fd >= rait_table_count)) {
1216 rait_debug((stderr, "rait_tapefd_status:returning %d: %s\n",
1222 pr = &rait_table[fd];
1223 if (0 == pr->nopen) {
1225 rait_debug((stderr, "rait_tapefd_status:returning %d: %s\n",
1231 for( i = 0; i < pr->nfds ; i++ ) {
1232 res = tapefd_status(pr->fds[i], stat);
1244 rait_tapefd_resetofs(
1247 (void)rait_lseek(fd, (off_t)0, SEEK_SET);
1251 rait_tapefd_can_fork(
1254 (void)fd; /* Quiet unused parameter warning */