git.gag.com Git - debian/amanda/blob - perl/Amanda/Xfer.pod

   1 /*
   2  * Copyright (c) 2009-2012 Zmanda, Inc.  All Rights Reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public License
   6  * as published by the Free Software Foundation; either version 2
   7  * of the License, or (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful, but
  10  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with this program; if not, write to the Free Software Foundation, Inc.,
  16  * 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  17  *
  18  * Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
  19  * Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  20  */
  21
  22 %perlcode %{
  23
  24 =head1 NAME
  25
  26 Amanda::Xfer - the transfer architecture
  27
  28 =head1 SYNOPSIS
  29
  30   use Amanda::MainLoop;
  31   use Amanda::Xfer qw( :constants );
  32   use POSIX;
  33
  34   my $infd = POSIX::open("input", POSIX::O_RDONLY, 0);
  35   my $outfd = POSIX::open("output", POSIX::O_CREAT|POSIX::O_WRONLY, 0640);
  36   my $xfer = Amanda::Xfer->new([
  37     Amanda::Xfer::Source::Fd->new($infd),
  38     Amanda::Xfer::Dest::Fd->new($outfd)
  39   ]);
  40   $xfer->start(sub {
  41       my ($src, $xmsg, $xfer) = @_;
  42       print "Message from $xfer: $xmsg\n"; # use stringify operations
  43       if ($msg->{'type'} == $XMSG_DONE) {
  44           Amanda::MainLoop::quit();
  45       }
  46   }, 0, 0);
  47   Amanda::MainLoop::run();
  48
  49 See L<http://wiki.zmanda.com/index.php/XFA> for background on the
  50 transfer architecture.
  51
  52 =head1 Amanda::Xfer Objects
  53
  54 A new transfer is created with C<< Amanda::Xfer->new() >>, which takes
  55 an arrayref giving the transfer elements which should compose the
  56 transfer.
  57
  58 The resulting object has the following methods:
  59
  60 =over
  61
  62 =item start($cb, $offset, $size)
  63
  64 Start this transfer.  It transfer $size bytes starting from offset $offset.
  65 $offset must be 0. $size is only supported by Amanda::Xfer::Source::Recovery.
  66 A size of 0 transfer everything to EOF.
  67 Processing takes place asynchronously, and messages will
  68 begin queueing up immediately.  If C<$cb> is given, then it is installed as the
  69 callback for messages from this transfer.  The callback receives three
  70 arguments: the event source, the message, and a reference to the controlling
  71 transfer.  See the description of C<Amanda::Xfer::Msg>, below, for details.
  72
  73 There is no need to remove the source on completion of the transfer - that is
  74 handled for you.
  75
  76 =item cancel()
  77
  78 Stop transferring data.  The transfer will send an C<XMSG_CANCEL>,
  79 "drain" any buffered data as best it can, and then complete normally
  80 with an C<XMSG_DONE>.
  81
  82 =item get_status()
  83
  84 Get the transfer's status.  The result will be one of C<$XFER_INIT>,
  85 C<$XFER_START>, C<$XFER_RUNNING>, or C<$XFER_DONE>.  These symbols are
  86 available for import with the tag C<:constants>.
  87
  88 =item repr()
  89
  90 Return a string representation of this transfer, suitable for use in
  91 debugging messages.  This method is automatically invoked when a
  92 transfer is interpolated into a string:
  93
  94   print "Starting $xfer\n";
  95
  96 =item get_source()
  97
  98 Get the L<Amanda::MainLoop> event source through which messages will
  99 be delivered for this transfer.  Use its C<set_callback> method to
 100 connect a perl sub for processing events.
 101
 102 Use of this method is deprecated; instead, pass a callback to the C<start>
 103 method.  If you set a callback via C<get_source>, then you I<must> C<remove>
 104 the source when the transfer is complete!
 105
 106 =back
 107
 108 =head1 Amanda::Xfer::Element objects
 109
 110 The individual transfer elements that compose a transfer are instances
 111 of subclasses of Amanda::Xfer::Element.  All such objects have a
 112 C<repr()> method, similar to that for transfers, and support a similar
 113 kind of string interpolation.
 114
 115 Note that the names of these classes contain the words "Source",
 116 "Filter", and "Dest".  This is merely suggestive of their intended
 117 purpose -- there are no such abstract classes.
 118
 119 =head2 Transfer Sources
 120
 121 =head3 Amanda::Xfer::Source::Device (SERVER ONLY)
 122
 123   Amanda::Xfer::Source::Device->new($device);
 124
 125 This source reads data from a device.  The device should already be
 126 queued up for reading (C<< $device->seek_file(..) >>).  The element
 127 will read until the end of the device file.
 128
 129 =head3 Amanda::Xfer::Source::Fd
 130
 131   Amanda::Xfer::Source::Fd->new(fileno($fh));
 132
 133 This source reads data from a file descriptor.  It reads until EOF,
 134 but does not close the descriptor.  Be careful not to let Perl close
 135 the file for you!
 136
 137 =head3 Amanda::Xfer::Source::Holding (SERVER-ONLY)
 138
 139   Amanda::Xfer::Source::Holding->new($filename);
 140
 141 This source reads data from a holding file (see L<Amanda::Holding>).
 142 If the transfer only consists of a C<Amanda::Xfer::Source::Holding>
 143 and an C<Amanda::Xfer::Dest::Taper::Cacher> (with no filters), then the source
 144 will call the destination's C<cache_inform> method so that it can use
 145 holding chunks for a split-part cache.
 146
 147 =head3 Amanda::Xfer::Source::Random
 148
 149   Amanda::Xfer::Source::Random->new($length, $seed);
 150
 151 This source provides I<length> bytes of random data (or an unlimited
 152 amount of data if I<length> is zero).  C<$seed> is the seed used to
 153 generate the random numbers; this seed can be used in a destination to
 154 check for correct output.
 155
 156 If you need to string multiple transfers together into a coherent sequence of
 157 random numbers, for example when testing the re-assembly of spanned dumps, call
 158
 159   my $seed = $src->get_seed();
 160
 161 to get the finishing seed for the source, then pass this to the source
 162 constructor for the next transfer.  When concatenated, the bytestreams from the
 163 transfers will verify correctly using the original random seed.
 164
 165 =head3 Amanda::Xfer::Source::Pattern
 166
 167   Amanda::Xfer::Source::Pattern->new($length, $pattern);
 168
 169 This source provides I<length> bytes containing copies of
 170 I<pattern>. If I<length> is zero, the source provides an unlimited
 171 number of bytes.
 172
 173 =head3 Amanda::Xfer::Source::Recovery (SERVER ONLY)
 174
 175   Amanda::Xfer::Source::Recovery->new($first_device);
 176
 177 This source reads a datastream composed of on-device files.  Its constructor
 178 takes a pointer to the first device that will be read from; this is used
 179 internally to determine whether DirectTCP is supported.
 180
 181 The element sense C<$XMSG_READY> when it is ready for the first C<start_part>
 182 invocation.  Don't do anything with the device between the start of the
 183 transfer and when the element sends an C<$XMSG_READY>.
 184
 185 The element contains no logic to decide I<which> files to assemble into the
 186 datastream; instead, it relies on the caller to supply pre-positioned devices:
 187
 188   $src->start_part($device);
 189
 190 Once C<start_part> is called, the source will read until C<$device> produces an
 191 EOF.  As each part is completed, the element sends an C<$XMSG_PART_DONE>
 192 L<Amanda::Xfer::Msg>, with the following keys:
 193
 194  size       bytes read from the device
 195  duration   time spent reading
 196  fileno     the on-media file number from which the part was read
 197
 198 Call C<start_part> with C<$device = undef> to indicate that there are no more
 199 parts.
 200
 201 To switch to a new device in mid-transfer, use C<use_device>:
 202
 203   $dest->use_device($device);
 204
 205 This method must be called with a device that is not yet started, and thus must
 206 be called before the C<start_part> method is called with a new device.
 207
 208 =head3 Amanda::Xfer::Source::DirectTCPListen
 209
 210   Amanda::Xfer::Source::DirectTCPListen->new();
 211
 212 This source is for use when the transfer data will come in via DirectTCP, with
 213 the data's I<source> connecting to the data's I<destination>.  That is, the
 214 data source is the connection initiator.  Set up the transfer, and after
 215 starting it, call this element's C<get_addrs> method to get an arrayref of ip/port pairs,
 216 e.g., C<[ "192.168.4.5", 9924 ]>, all of which are listening for an incoming
 217 data connection.  Once a connection arrives, this element will read data from
 218 it and send those data into the transfer.
 219
 220   my $addrs = $src->get_addrs();
 221
 222 =head3 Amanda::Xfer::Source::DirectTCPConnect
 223
 224   Amanda::Xfer::Source::DirectTCPConnect->new($addrs);
 225
 226 This source is for use when the transfer data will come in via DirectTCP, with
 227 the data's I<destination> connecting to the the data's I<source>.  That is, the
 228 data destination is the connection initiator.  The element connects to
 229 C<$addrs> and reads the transfer data from the connection.
 230
 231 =head2 Transfer Filters
 232
 233 =head3 Amanda::Xfer::Filter:Process
 234
 235   $xfp = Amanda::Xfer::Filter::Process->new([@args], $need_root);
 236
 237 This filter will pipe data through the standard file descriptors of the
 238 subprocess specified by C<@args>.  If C<$need_root> is true, it will attempt to
 239 change to uid 0 before executing the process.  Note that the process is
 240 invoked directly, not via a shell, so shell metacharcters (e.g., C<< 2>&1 >>)
 241 will not function as expected. This method create a pipe for the process
 242 stderr and the caller must read it or a hang may occur.
 243
 244   $xfp->get_stderr_fd()
 245
 246 Return the file descriptor of the stderr pipe to read from.
 247
 248 =head3 Amanda::Xfer::Filter:Xor
 249
 250   Amanda::Xfer::Filter::Xor->new($key);
 251
 252 This filter applies a bytewise XOR operation to the data flowing
 253 through it.
 254
 255 =head2 Transfer Destinations
 256
 257 =head3 Amanda::Xfer::Dest::Device (SERVER ONLY)
 258
 259   Amanda::Xfer::Dest::Device->new($device, $cancel_at_eom);
 260
 261 This source writes data to a device.  The device should be ready for writing
 262 (C<< $device->start_file(..) >>).  On completion of the transfer, the file will
 263 be finished.  If an error occurs, or if C<$cancel_at_eom> is true and the
 264 device signals LEOM, the transfer will be cancelled.
 265
 266 Note that this element does not apply any sort of stream buffering.
 267
 268 =head3 Amanda::Xfer::Dest::Buffer
 269
 270   Amanda::Xfer::Dest::Buffer->new($max_size);
 271
 272 This destination records data into an in-memory buffer which can grow up to
 273 C<$max_size> bytes.  The buffer is available with the C<get> method, which
 274 returns a copy of the buffer as a perl scalar:
 275
 276     my $buf = $xdb->get();
 277
 278 =head3 Amanda::Xfer::Dest::DirectTCPListen
 279
 280   Amanda::Xfer::Dest::DirectTCPListen->new();
 281
 282 This destination is for use when the transfer data will come in via DirectTCP,
 283 with the data's I<destination> connecting to the data's I<source>.  That is,
 284 the data destination is the connection initiator.  Set up the transfer, and
 285 after starting it, call this element's C<get_addrs> method to get an arrayref
 286 of ip/port pairs, e.g., C<[ "192.168.4.5", 9924 ]>, all of which are listening
 287 for an incoming data connection.  Once a connection arrives, this element will
 288 write the transfer data to it.
 289
 290   my $addrs = $src->get_addrs();
 291
 292 =head3 Amanda::Xfer::Dest::DirectTCPConnect
 293
 294   Amanda::Xfer::Dest::DirectTCPConnect->new($addrs);
 295
 296 This destination is for use when the transfer data will come in via DirectTCP,
 297 with the data's I<source> connecting to the the data's I<destination>.  That
 298 is, the data source is the connection initiator.  The element connects to
 299 C<$addrs> and writes the transfer data to the connection.
 300
 301 =head3 Amanda::Xfer::Dest::Fd
 302
 303   Amanda::Xfer::Dest::Fd->new(fileno($fh));
 304
 305 This destination writes data to a file descriptor.  The file is not
 306 closed after the transfer is completed.  Be careful not to let Perl
 307 close the file for you!
 308
 309 =head3 Amanda::Xfer::Dest::Null
 310
 311   Amanda::Xfer::Dest::Null->new($seed);
 312
 313 This destination discards the data it receives.  If C<$seed> is
 314 nonzero, then the element will validate that it receives the data that
 315 C<Amanda::Xfer::Source::Random> produced with the same seed.  No
 316 validation is performed if C<$seed> is zero.
 317
 318 =head3 Amanda::Xfer::Dest::Taper (SERVER ONLY)
 319
 320 This is the parent class to C<Amanda::Xfer::Dest::Taper::Cacher> and
 321 C<Amanda::Xfer::Dest::Taper::DirectTCP>. These subclasses allow a single
 322 transfer to write to multiple files (parts) on a device, and even spread those
 323 parts over multiple devices, without interrupting the transfer itself.
 324
 325 The subclass constructors all take a C<$first_device>, which should be
 326 configured but not yet started; and a C<$part_size> giving the maximum size of
 327 each part.  Note that this value may be rounded up internally as necessary.
 328
 329 When a transfer using a taper destination element is first started, no data is
 330 transfered until the element's C<start_part> method is called:
 331
 332   $dest->start_part($retry_part);
 333
 334 where C<$device> is the device to which the part should be written.  The device
 335 should have a file open and ready to write (that is,
 336 C<< $device->start_file(..) >> has already been called).  If C<$retry_part> is
 337 true, then the previous, unsuccessful part will be retried.
 338
 339 As each part is completed, the element sends an C<$XMSG_PART_DONE>
 340 C<Amanda::Xfer::Msg>, with the following keys:
 341
 342  successful true if the part was written successfully
 343  eof        recipient should not call start_part again
 344  eom        this volume is at EOM; a new volume is required
 345  size       bytes written to volume
 346  duration   time spent writing, not counting changer ops, etc.
 347  partnum    the zero-based number of this part in the overall dumpfile
 348  fileno     the on-media file number used for this part, or 0 if no file
 349             was used
 350
 351 If C<eom> is true, then the caller should find a new volume before
 352 continuing.  If C<eof> is not true, then C<start_part> should be called
 353 again, with C<$retry_part = !successful>.  Note that it is possible
 354 for some destinations to write a portion of a part successfully,
 355 but still stop at EOM.  That is, C<eom> does not necessarily imply
 356 C<!successful>.
 357
 358 To switch to a new device in mid-transfer, use C<use_device>:
 359
 360   $dest->use_device($device);
 361
 362 This method must be called with a device that is not yet started.
 363
 364 If neither the memory nor disk caches are in use, but the dumpfile is
 365 available on disk, then the C<cache_inform> method allows the element
 366 to use that on-disk data to support retries.  This is intended to
 367 support transfers from Amanda's holding disk (see
 368 C<Amanda::Xfer::Source::Holding>), but may be useful for other
 369 purposes.
 370
 371   $dest->cache_inform($filename, $offset, $length);
 372
 373 This function indicates that C<$filename> contains C<$length> bytes of
 374 data, beginning at offset C<$offset> from the beginning of the file.
 375 These bytes are assumed to follow immediately after any bytes
 376 previously specified to C<cache_inform>.  That is, no gaps or overlaps
 377 are allowed in the data stream described to C<cache_inform>.
 378 Furthermore, the location of each byte must be specified to this
 379 method I<before> it is sent through the transfer.
 380
 381   $dest->get_part_bytes_written();
 382
 383 This function returns the number of bytes written for the current part
 384 to the device.
 385
 386 =head3 Amanda::Xfer::Dest::Taper::Splitter
 387
 388   Amanda::Xfer::Dest::Taper::Splitter->new($first_device, $max_memory,
 389                         $part_size, $expect_cache_inform);
 390
 391 This class splits a data stream into parts on the storage media.  It is for use
 392 when the device supports LEOM, when the dump is already available on disk
 393 (C<cache_inform>), or when no caching is desired.  It does not cache parts, so
 394 it can only retry a partial part if the transfer source is calling
 395 C<cache_inform>.  If the element is used with devices that do not support LEOM,
 396 then it will cancel the entire transfer if the device reaches EOM and
 397 C<cache_inform> is not in use.  Set C<$expect_cache_inform> appropriately based
 398 on the incoming data.
 399
 400 The C<$part_size> and C<$first_device> parameters are described above for
 401 C<Amanda::Xfer::Dest::Taper>.
 402
 403 =head3 Amanda::Xfer::Dest::Taper::Cacher
 404
 405   Amanda::Xfer::Dest::Taper::Cacher->new($first_device, $max_memory,
 406                         $part_size, $use_mem_cache, $disk_cache_dirname);
 407
 408 This class is similar to the splitter, but caches data from each part in one of
 409 a variety of ways to support "rewinding" to retry a failed part (e.g., one that
 410 does not fit on a device).  It assumes that when a device reaches EOM while
 411 writing, the entire on-volume file is corrupt - that is, that the device does
 412 not support logical EOM.  The class does not support C<cache_inform>.
 413
 414 The C<$part_size> and C<$first_device> parameters are described above for
 415 C<Amanda::Xfer::Dest::Taper>.
 416
 417 If C<$use_mem_cache> is true, each part will be cached in memory (using
 418 C<$part_size> bytes of memory; plan accordingly!).  If C<$disk_cache_dirname>
 419 is defined, then each part will be cached on-disk in a file in this directory.
 420 It is an error to specify both in-memory and on-disk caching.  If neither
 421 option is specified, the element will operate successfully, but will not be
 422 able to retry a part, and will cancel the transfer if a part fails.
 423
 424 =head3 Amanda::Xfer::Dest::Taper::DirectTCP
 425
 426   Amanda::Xfer::Dest::Taper::DirectTCP->new($first_device, $part_size);
 427
 428 This class uses the Device API DirectTCP methods to write data to a device via
 429 DirectTCP.  Since all DirectTCP devices support logical EOM, this class does
 430 not cache any data, and will never re-start an unsuccessful part.
 431
 432 As state above, C<$first_device> must not be started when C<new> is called.
 433 Furthermore, no use of that device is allowed until the element sens an
 434 C<$XMSG_READY> to indicate that it is finished with the device.  The
 435 C<start_part> method must not be called until this method is received either.
 436
 437 =head1 Amanda::Xfer::Msg objects
 438
 439 Messages are simple hashrefs, with a few convenience methods.  Like
 440 transfers, they have a C<repr()> method that formats the message
 441 nicely, and is available through string interpolation:
 442
 443   print "Received message $msg\n";
 444
 445 The canonical description of the message types and keys is in
 446 C<xfer-src/xmsg.h>, and is not duplicated here.  Every message has the
 447 following basic keys.
 448
 449 =over
 450
 451 =item type
 452
 453 The message type -- one of the C<xmsg_type> constants available from
 454 the import tag C<:constants>.
 455
 456 =item elt
 457
 458 The transfer element that sent the message.
 459
 460 =item version
 461
 462 The version of the message.  This is used to support extensibility of
 463 the protocol.
 464
 465 =back
 466
 467 Additional keys are described in the documentation for the elements
 468 that use them.  All keys are listed in C<xfer-src/xmsg.h>.
 469
 470 =cut
 471
 472
 473 %}