git.gag.com Git - debian/amanda/blob - perl/Amanda/Recovery/Planner.pm

   1 # Copyright (c) 2010-2012 Zmanda, Inc.  All Rights Reserved.
   2 #
   3 # This library is free software; you can redistribute it and/or
   4 # modify it under the terms of the GNU Lesser General Public
   5 #* License as published by the Free Software Foundation; either
   6 # version 2.1 of the License, or (at your option) any later version.
   7 #
   8 # This library is distributed in the hope that it will be useful, but
   9 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  11 # License for more details.
  12 #
  13 # You should have received a copy of the GNU Lesser General Public License
  14 # along with this library; if not, write to the Free Software Foundation,
  15 # Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA.
  16 #
  17 # Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
  18 # Sunnyvale, CA 94086, USA, or: http://www.zmanda.com
  19
  20 =head1 NAME
  21
  22 Amanda::Recovery::Planner - use the catalog to plan recoveries
  23
  24 =head1 SYNOPSIS
  25
  26     my $plan;
  27
  28     $subs{'make_plan'} = make_cb(make_plan => sub {
  29         Amanda::Recovery::Planner::make_plan(
  30             dumpspecs => [ $ds1, $ds2 ],
  31             algorithm => $algo,
  32             changer => $changer,
  33             plan_cb => $subs{'plan_cb'});
  34     };
  35
  36     $subs{'plan_cb'} = make_cb(plan_cb => sub {
  37         my ($err, $pl) = @_;
  38         die $err if $err;
  39
  40         $plan = $pl;
  41         $subs{'start_next_dumpfile'}->();
  42     });
  43
  44     $subs{'start_next_dumpfile'} = make_cb(start_next_dumpfile => sub {
  45         my $dump = shift @{$plan->{'dumps'}};
  46         if (!$dump) {
  47             # .. all done!
  48         }
  49
  50         print "recovering ", $dump->{'hostname'}, " ", $dump->{'diskname'}, "\n";
  51         $clerk->get_xfer_src( .. dump => $dump .. );
  52         # ..
  53     });
  54
  55 =head1 OVERVIEW
  56
  57 This package determines the optimal way to recover dump files from storage.
  58 Its function is superficially fairly simple: given a collection of desired
  59 dumpfiles, it returns a Plan to recover those dumpfiles, specifying exactly the
  60 volumes and files that are needed, and the order in which they should be
  61 accesed.
  62
  63 =head2 ALGORITHMS
  64
  65 Several algorithms will soon be available for selecting volumes when a dumpfile
  66 appears in several places (e.g., from an amvault operation).  At the moment,
  67 the algorithm argument should be omitted, as this will eventually indicate that
  68 the user-configured algorithm should be applied.
  69
  70 =head2 INSTANTIATING A PLAN
  71
  72 For most purposes, you should call C<make_plan> with the desired dumpspecs, a
  73 changer, and a callback:
  74
  75     Amanda::Recovery::Planner::make_plan(
  76         dumpspecs => [ $ds1, $ds2, .. ],
  77         changer => $chg,
  78         plan_cb => $plan_cb);
  79
  80 As a shortcut, you may also specify a single dumpspec:
  81
  82     Amanda::Recovery::Planner::make_plan(
  83         dumpspec => $ds,
  84         changer => $chg,
  85         plan_cb => $plan_cb);
  86
  87 Note that in this case, the resulting plan may contain more than one dump, if
  88 the dumpspec was not unambiguous.
  89
  90 To select the planner algorithm, pass an C<algorithm> argument.  This argument
  91 is currently ignored and should be omitted.  If the optional argument C<debug>
  92 is given with a true value, then the Planner will log additional debug
  93 information to the Amanda debug logs.  Debugging is automatically enabled if
  94 the C<DEBUG_RECOVERY> configuration parameter is set to anything greater than
  95 1.
  96
  97 The optional argument C<one_dump_per_part> will create a "no-reassembly" plan,
  98 where each part appears as the only part in a unique dump.  The dump objects
  99 will have the key C<single_part> set to 1.
 100
 101 The C<plan_cb> is called with two arguments:
 102
 103     $plan_cb->($err, $plan);
 104
 105 If C<$err> is defined, it describes an error that occurred; otherwise, C<$plan>
 106 is the generated plan, as described below.
 107
 108 Some algorithms may consult the changer's inventory to determine what volumes
 109 are available.  It is because of this asynchronous operation that C<make_plan>
 110 takes a callback instead of simply returning the plan.
 111
 112 =head3 Pre-defined Plans
 113
 114 In some cases, you already know exactly where the data is, and just need a
 115 proper plan object to hand to L<Amanda::Recovery::Clerk>.  One such case is a
 116 recovery from a holding file.  In this case, use C<make_plan> like this:
 117
 118     Amanda::Recovery::Planner::make_plan(
 119         holding_file => $hf,
 120         dumpspec => $ds,
 121         plan_cb => $plan_cb);
 122
 123 This will create a plan to recover the data in C<$fh>.  The dumpspec is
 124 optional, but if present will be used to verify that the holding file contains
 125 the appropriate dump.
 126
 127 Similarly, if you have a list of label:fileno pairs to use, call C<make_plan>
 128 like this:
 129
 130     Amanda::Recovery::Planner::make_plan(
 131         filelist => [
 132             $label => [ $filenum, $filenum, .. ],
 133             $label => ..
 134         ],
 135         dumpspec => $ds,
 136         plan_cb => $plan_cb);
 137
 138 This will verify the requested files against the catalog and the dumpspec, then
 139 hand back a plan that essentially embodies C<filelist>.
 140
 141 Note that both of these functions will only create a single-dump plan.
 142
 143 =head2 PLANS
 144
 145 A Plan is a perl object describing the process for recovering zero or more
 146 dumpfiles.  Its principal components are dumps, in order, that are to be
 147 recovered, but the object presents some other interfaces that return useful
 148 information about the plan.
 149
 150 The C<'dumps'> key holds the list of dumps, in the order they should be
 151 performed.  Callers should shift dumps off this list to present to the Clerk.
 152
 153 To get a list of volumes that the plan requires, in order, use
 154 C<get_volume_list>.  Each volume is represented as a hash:
 155
 156   { label => 'DATA182', available => 1 }
 157
 158 where C<available> is false if the planner did not find this volume in the
 159 changer.  Planners which do not consult the changer will have a false value for
 160 C<available>.
 161
 162 Similarly, to get a list of holding files that the plan requires, in order, use
 163 C<get_holding_file_list>.  Each file is represented as a string giving the
 164 fully qualified pathname.
 165
 166 =cut
 167
 168 package Amanda::Recovery::Planner;
 169
 170 use strict;
 171 use warnings;
 172 use Carp;
 173
 174 sub make_plan {
 175     my %params = @_;
 176
 177     $params{'dumpspecs'} = [ $params{'dumpspec'} ]
 178         if exists $params{'dumpspec'};
 179
 180     my $plan = Amanda::Recovery::Planner::Plan->new({
 181         algo => $params{'algorithm'},
 182         chg => $params{'changer'},
 183         debug => $params{'debug'},
 184         one_dump_per_part => $params{'one_dump_per_part'},
 185     });
 186
 187     if (exists $params{'holding_file'}) {
 188         $plan->make_holding_plan(%params);
 189     } elsif (exists $params{'filelist'}) {
 190         $plan->make_plan_from_filelist(%params);
 191     } else {
 192         $plan->make_plan(%params);
 193     }
 194 }
 195
 196 package Amanda::Recovery::Planner::Plan;
 197
 198 use strict;
 199 use warnings;
 200 use Data::Dumper;
 201 use Carp;
 202
 203 use Amanda::Device qw( :constants );
 204 use Amanda::Holding;
 205 use Amanda::Header;
 206 use Amanda::Config qw( :getconf config_dir_relative );
 207 use Amanda::Debug qw( :logging );
 208 use Amanda::MainLoop;
 209 use Amanda::DB::Catalog;
 210 use Amanda::Tapelist;
 211
 212 sub new {
 213     my $class = shift;
 214     my $self = shift;
 215
 216     $self->{'debug'} = $Amanda::Config::debug_recovery
 217         if not defined $self->{'debug'}
 218             or $Amanda::Config::debug_recovery > $self->{'debug'};
 219
 220     return bless($self, $class);
 221 }
 222
 223 sub shift_dump {
 224     my $self = shift;
 225     return shift @{$self->{'dumps'}};
 226 }
 227
 228 sub make_plan {
 229     my $self = shift;
 230     my %params = @_;
 231
 232     for my $rq_param (qw(changer plan_cb dumpspecs)) {
 233         croak "required parameter '$rq_param' mising"
 234             unless exists $params{$rq_param};
 235     }
 236     my $dumpspecs = $params{'dumpspecs'};
 237
 238     # first, get the set of dumps that match these dumpspecs
 239     my @dumps = Amanda::DB::Catalog::get_dumps(dumpspecs => $dumpspecs);
 240
 241     # now "bin" those by host/disk/dump_ts/level
 242     my %dumps;
 243     for my $dump (@dumps) {
 244         my $k = join("\0", $dump->{'hostname'}, $dump->{'diskname'},
 245                            $dump->{'dump_timestamp'}, $dump->{'level'});
 246         $dumps{$k} = [] unless exists $dumps{$k};
 247         push @{$dumps{$k}}, $dump;
 248     }
 249
 250     # now select the "best" of each set of dumps, and put that in @dumps
 251     @dumps = ();
 252     for my $options (values %dumps) {
 253         my @options = @$options;
 254         # if there's only one option, the choice is easy
 255         if (@options == 1) {
 256             push @dumps, $options[0];
 257             next;
 258         }
 259
 260         # if there are several, narrow to those with an OK status or barring that,
 261         # those with a PARTIAL status.  FAIL need not apply.
 262         my @ok_options = grep { $_->{'status'} eq 'OK' } @options;
 263         my @partial_options = grep { $_->{'status'} eq 'PARTIAL' } @options;
 264
 265         if (@ok_options) {
 266             @options = @ok_options;
 267         } else {
 268             @options = @partial_options;
 269         }
 270
 271         # now, take the one written longest ago - this gets us the dump on secondary
 272         # media if it hasn't been overwritten, otherwise the dump on tertiary media,
 273         # etc.  Note that this also prefers dumps on holding disk, since they are
 274         # tagged with a write_timestamp of 0
 275         @options = Amanda::DB::Catalog::sort_dumps(['write_timestamp'], @options);
 276         push @dumps, $options[0];
 277     }
 278
 279     # at this point we have exactly one instance of each dump in @dumps.
 280
 281     # If one_dump_per_part was specified, rearrange @dumps to have a distinct
 282     # dump object for each part.
 283     if ($self->{'one_dump_per_part'}) {
 284         @dumps = $self->split_dumps_per_part(\@dumps);
 285     }
 286
 287     # now sort the dumps in order by their constituent parts.  This sorts based
 288     # on write_timestamp, then on the label of the first part of the dump,
 289     # using the tapelist to order the labels.  Where labels match, it sorts on
 290     # the part's filenum.  This should sort the dumps into the order in which
 291     # they were written, with holding dumps coming in at the head of the list.
 292     my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
 293     my $tapelist = Amanda::Tapelist->new($tapelist_filename);
 294
 295     my $sortfn = sub {
 296         my $rv;
 297         my $tle;
 298
 299         return $rv
 300             if ($rv = $a->{'write_timestamp'} cmp $b->{'write_timestamp'});
 301
 302         # above will take care of comparing a holding dump to an on-media dump, but
 303         # if both are on holding then we need to compare them lexically
 304         if (exists $a->{'parts'}[1]{'holding_file'}
 305         and exists $b->{'parts'}[1]{'holding_file'}) {
 306             return $a->{'parts'}[1]{'holding_file'} cmp $b->{'parts'}[1]{'holding_file'};
 307         }
 308
 309         my ($alabel, $blabel) = (
 310             $a->{'parts'}[1]{'label'},
 311             $b->{'parts'}[1]{'label'},
 312         );
 313
 314         my ($apos, $bpos);
 315         $apos = $tle->{'position'}
 316             if (($tle = $tapelist->lookup_tapelabel($alabel)));
 317         $bpos = $tle->{'position'}
 318             if (($tle = $tapelist->lookup_tapelabel($blabel)));
 319         return ($bpos <=> $apos) # not: reversed for "oldest to newest"
 320             if defined $bpos && defined $apos && ($bpos <=> $apos);
 321
 322         # if a tape wasn't in the tapelist, just sort the labels lexically (this
 323         # really shouldn't happen)
 324         if (!defined $bpos || !defined $apos) {
 325             return $alabel cmp $blabel
 326                 if defined $alabel and defined $blabel and $alabel cmp $blabel ;
 327         }
 328
 329         # finally, the dumps are on the same volume, so just sort by filenum
 330         return $a->{'parts'}[1]{'filenum'} <=> $b->{'parts'}[1]{'filenum'};
 331     };
 332     @dumps = sort $sortfn @dumps;
 333
 334     $self->{'dumps'} = \@dumps;
 335
 336     Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
 337 }
 338
 339 sub make_holding_plan {
 340     my $self = shift;
 341     my %params = @_;
 342
 343     for my $rq_param (qw(holding_file plan_cb)) {
 344         croak "required parameter '$rq_param' mising"
 345             unless exists $params{$rq_param};
 346     }
 347
 348     # This is a little tricky.  The idea is to open up the holding file and
 349     # read its header, then find that dump in the catalog.  This may seem like
 350     # the long way around, but it adds an extra layer of security to the
 351     # recovery process, as it prevents recovery from arbitrary files on the
 352     # filesystem that are not under a recognized holding directory.
 353
 354     my $hdr = Amanda::Holding::get_header($params{'holding_file'});
 355     if (!$hdr or $hdr->{'type'} != $Amanda::Header::F_DUMPFILE) {
 356         return $params{'plan_cb'}->(
 357                 "could not open '$params{holding_file}': missing or not a holding file");
 358     }
 359
 360     # look up this holding file in the catalog, adding the dumpspec we were
 361     # given so that get_dumps will compare against it for us.
 362     my $dump_timestamp = $hdr->{'datestamp'};
 363     my $hostname = $hdr->{'name'};
 364     my $diskname = $hdr->{'disk'};
 365     my $level = $hdr->{'dumplevel'};
 366     my @dumps = Amanda::DB::Catalog::get_dumps(
 367             $params{'dumpspec'}? (dumpspecs => [ $params{'dumpspec'} ]) : (),
 368             dump_timestamp => $dump_timestamp,
 369             hostname => $hostname,
 370             diskname => $diskname,
 371             level => $level,
 372             holding => 1,
 373         );
 374
 375     if (!@dumps) {
 376         return $params{'plan_cb'}->(
 377                 "Specified holding file does not match dumpspec");
 378     }
 379
 380     # this would be weird..
 381     $self->dbg("got multiple dumps from Amanda::DB::Catalog for a holding file!")
 382         if (@dumps > 1);
 383
 384     # arbitrarily keepy the first dump if we got several
 385     $self->{'dumps'} = [ $dumps[0] ];
 386
 387     Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
 388 }
 389
 390 sub make_plan_from_filelist {
 391     my $self = shift;
 392     my %params = @_;
 393
 394     for my $rq_param (qw(filelist plan_cb)) {
 395         croak "required parameter '$rq_param' mising"
 396             unless exists $params{$rq_param};
 397     }
 398
 399     my $steps = define_steps
 400         cb_ref => \$params{'plan_cb'};
 401
 402     step get_inventory => sub {
 403         if (defined $params{'chg'} and $params{'chg'}->have_inventory()) {
 404             return $params{'chg'}->inventory( inventory_cb => $steps->{'got_inventory'});
 405         } else {
 406             return $steps->{'got_inventory'}->(undef, undef);
 407         }
 408     };
 409     step got_inventory => sub {
 410         my ($err, $inventory) = @_;
 411
 412         # This is similarly tricky - in this case, we search for dumps matching
 413         # both the dumpspec and the labels, filter that down to just the parts we
 414         # want, and then check that only one dump remains.  Then we look up that
 415         # dump.
 416
 417         my @labels;
 418         my %files;
 419         my @filelist = @{$params{'filelist'}};
 420         while (@filelist) {
 421             my $label = shift @filelist;
 422             push @labels, $label;
 423             $files{$label} = shift @filelist;
 424         }
 425
 426         my @parts = Amanda::DB::Catalog::get_parts(
 427                 $params{'dumpspec'}? (dumpspecs => [ $params{'dumpspec'} ]) : (),
 428                 labels => [ @labels ]);
 429
 430         # filter down to the parts that match filelist (using %files)
 431         @parts = grep {
 432             my $filenum = $_->{'filenum'};
 433             grep { $_ == $filenum } @{$files{$_->{'label'}}};
 434         } @parts;
 435
 436         # extract the dumps, using a hash (on the perl identity of the dump) to
 437         # ensure uniqueness
 438         my %dumps = map { my $d = $_->{'dump'}; ($d, $d) } @parts;
 439         my @dumps = values %dumps;
 440
 441         if (!@dumps) {
 442             return $params{'plan_cb'}->(
 443                 "Specified file list does not match dumpspec");
 444         } elsif (@dumps > 1) {
 445             # Check if they are all for the same dump
 446             my $dump_timestamp = $dumps[0]->{'dump_timestamp'};
 447             my $hostname = $dumps[0]->{'hostname'};
 448             my $diskname = $dumps[0]->{'diskname'};
 449             my $level = $dumps[0]->{'level'};
 450             my $orig_kb = $dumps[0]->{'orig_kb'};
 451
 452             foreach my $dump (@dumps) {
 453                 if ($dump_timestamp != $dump->{'dump_timestamp'} ||
 454                     $hostname ne $dump->{'hostname'} ||
 455                     $diskname ne $dump->{'diskname'} ||
 456                     $level != $dump->{'level'} ||
 457                     $orig_kb != $dump->{'orig_kb'}) {
 458                     return $params{'plan_cb'}->(
 459                         "Specified file list matches multiple dumps; cannot continue recovery");
 460                 }
 461             }
 462
 463             # I would prefer the Planner to return alternate dump and the Clerk
 464             # choose which one to use
 465             if (defined $inventory) {
 466                 for my $dump (@dumps) {
 467                     my $all_part_found = 0;
 468                     my $part_found = 1;
 469                     for my $part (@{$dump->{'parts'}}) {
 470                         next if !defined $part;
 471                         my $found = 0;
 472                         foreach my $sl (@$inventory) {
 473                             if (defined $sl->{'label'} and
 474                                 $sl->{'label'} eq $part->{'label'}) {
 475                                 $found = 1;
 476                                 last;
 477                             }
 478                         }
 479                         if ($found == 0) {
 480                             $part_found = 0;
 481                             last;
 482                         }
 483                     }
 484                     if ($part_found == 1) {
 485                         @dumps = $dumps[0];
 486                         last;
 487                     }
 488                 }
 489                 # the first one will be used
 490             } else {
 491                 # will uses the first dump.
 492             }
 493         }
 494
 495         # now, because of the weak linking used by Amanda::DB::Catalog, we need to
 496         # re-query for this dump.  If we don't do this, the parts will all be
 497         # garbage-collected when we hand back the plan.  This is, chartiably, "less
 498         # than ideal".  Note that this has the side-effect of filling in any parts of
 499         # the dump that were missing from the filelist.
 500         @dumps = Amanda::DB::Catalog::get_dumps(
 501             hostname => $dumps[0]->{'hostname'},
 502             diskname => $dumps[0]->{'diskname'},
 503             level => $dumps[0]->{'level'},
 504             dump_timestamp => $dumps[0]->{'dump_timestamp'},
 505             write_timestamp => $dumps[0]->{'write_timestamp'},
 506             dumpspecs => $params{'dumpspecs'});
 507
 508         # sanity check
 509         confess "no dumps" unless @dumps;
 510         $self->{'dumps'} = [ $dumps[0] ];
 511
 512         Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
 513     };
 514 }
 515
 516 sub split_dumps_per_part {
 517     my $self = shift;
 518     my ($dumps) = @_;
 519
 520     my @new_dumps;
 521
 522     for my $dump (@$dumps) {
 523         for my $part (@{$dump->{'parts'}}) {
 524             my ($newdump, $newpart);
 525
 526             # skip part 0
 527             next unless defined $part;
 528
 529             # shallow copy the dump and part objects
 530             $newdump = do { my %t = %$dump; \%t; };
 531             $newpart = do { my %t = %$part; \%t; };
 532
 533             # overwrite the interlinking
 534             $newpart->{'dump'} = $newdump;
 535             $newdump->{'parts'} = [ undef, $newpart ];
 536
 537             $newdump->{'single_part'} = 1;
 538
 539             push @new_dumps, $newdump;
 540         }
 541     }
 542
 543     return @new_dumps;
 544 }
 545
 546 sub get_volume_list {
 547     my $self = shift;
 548     my $last_label;
 549     my @volumes;
 550
 551     for my $dump (@{$self->{'dumps'}}) {
 552         for my $part (@{$dump->{'parts'}}) {
 553             next unless defined $part; # skip parts[0]
 554             next unless defined $part->{'label'}; # skip holding parts
 555             if (!defined $last_label || $part->{'label'} ne $last_label) {
 556                 $last_label = $part->{'label'};
 557                 push @volumes, { label => $last_label, available => 0 };
 558             }
 559         }
 560     }
 561
 562     return @volumes;
 563 }
 564
 565 sub get_holding_file_list {
 566     my $self = shift;
 567     my @hfiles;
 568
 569     for my $dump (@{$self->{'dumps'}}) {
 570         for my $part (@{$dump->{'parts'}}) {
 571             next unless defined $part; # skip parts[0]
 572             next unless defined $part->{'holding_file'}; # skip on-media dumps
 573             push @hfiles, $part->{'holding_file'};
 574         }
 575     }
 576
 577     return @hfiles;
 578 }
 579
 580 sub dbg {
 581     my ($self, $msg) = @_;
 582     if ($self->{'debug'}) {
 583         debug("Amanda::Recovery::Planner: $msg");
 584     }
 585 }
 586
 587 1;