git.gag.com Git - debian/amanda/blob - perl/Amanda/Recovery/Planner.pm

   1 # Copyright (c) 2010 Zmanda, Inc.  All Rights Reserved.
   2 #
   3 # This library is free software; you can redistribute it and/or modify it
   4 # under the terms of the GNU Lesser General Public License version 2.1 as
   5 # published by the Free Software Foundation.
   6 #
   7 # This library is distributed in the hope that it will be useful, but
   8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   9 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  10 # License for more details.
  11 #
  12 # You should have received a copy of the GNU Lesser General Public License
  13 # along with this library; if not, write to the Free Software Foundation,
  14 # Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA.
  15 #
  16 # Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
  17 # Sunnyvale, CA 94086, USA, or: http://www.zmanda.com
  18
  19 =head1 NAME
  20
  21 Amanda::Recovery::Planner - use the catalog to plan recoveries
  22
  23 =head1 SYNOPSIS
  24
  25     my $plan;
  26
  27     $subs{'make_plan'} = make_cb(make_plan => sub {
  28         Amanda::Recovery::Planner::make_plan(
  29             dumpspecs => [ $ds1, $ds2 ],
  30             algorithm => $algo,
  31             changer => $changer,
  32             plan_cb => $subs{'plan_cb'});
  33     };
  34
  35     $subs{'plan_cb'} = make_cb(plan_cb => sub {
  36         my ($err, $pl) = @_;
  37         die $err if $err;
  38
  39         $plan = $pl;
  40         $subs{'start_next_dumpfile'}->();
  41     });
  42
  43     $subs{'start_next_dumpfile'} = make_cb(start_next_dumpfile => sub {
  44         my $dump = $plan->shift_dump();
  45         if (!$dump) {
  46             # .. all done!
  47         }
  48
  49         print "recovering ", $dump->{'hostname'}, " ", $dump->{'diskname'}, "\n";
  50         $clerk->get_xfer_src( .. dump => $dump .. );
  51         # ..
  52     });
  53
  54 =head1 OVERVIEW
  55
  56 This package determines the optimal way to recover dump files from storage.
  57 Its function is superficially fairly simple: given a collection of desired
  58 dumpfiles, it returns a Plan to recover those dumpfiles, specifying exactly the
  59 volumes and files that are needed, and the order in which they should be
  60 accesed.
  61
  62 =head2 ALGORITHMS
  63
  64 Several algorithms will soon be available for selecting volumes when a dumpfile
  65 appears in several places (e.g., from an amvault operation).  At the moment,
  66 the algorithm argument should be omitted, as this will eventually indicate that
  67 the user-configured algorithm should be applied.
  68
  69 =head2 INSTANTIATING A PLAN
  70
  71 For most purposes, you should call C<make_plan> with the desired dumpspecs, a
  72 changer, and a callback:
  73
  74     Amanda::Recovery::Planner::make_plan(
  75         dumpspecs => [ $ds1, $ds2, .. ],
  76         changer => $chg,
  77         plan_cb => $plan_cb);
  78
  79 As a shortcut, you may also specify a single dumpspec:
  80
  81     Amanda::Recovery::Planner::make_plan(
  82         dumpspec => $ds,
  83         changer => $chg,
  84         plan_cb => $plan_cb);
  85
  86 Note that in this case, the resulting plan may contain more than one dump, if
  87 the dumpspec was not unambiguous.
  88
  89 To select the planner algorithm, pass an C<algorithm> argument.  This argument
  90 is currently ignored and should be omitted.  If the optional argument C<debug>
  91 is given with a true value, then the Planner will log additional debug
  92 information to the Amanda debug logs.  Debugging is automatically enabled if
  93 the C<DEBUG_RECOVERY> configuration parameter is set to anything greater than
  94 1.
  95
  96 The optional argument C<one_dump_per_part> will create a "no-reassembly" plan,
  97 where each part appears as the only part in a unique dump.  The dump objects
  98 will have the key C<single_part> set to 1.
  99
 100 The C<plan_cb> is called with two arguments:
 101
 102     $plan_cb->($err, $plan);
 103
 104 If C<$err> is defined, it describes an error that occurred; otherwise, C<$plan>
 105 is the generated plan, as described below.
 106
 107 Some algorithms may consult the changer's inventory to determine what volumes
 108 are available.  It is because of this asynchronous operation that C<make_plan>
 109 takes a callback instead of simply returning the plan.
 110
 111 =head3 Pre-defined Plans
 112
 113 In some cases, you already know exactly where the data is, and just need a
 114 proper plan object to hand to L<Amanda::Recovery::Clerk>.  One such case is a
 115 recovery from a holding file.  In this case, use C<make_plan> like this:
 116
 117     Amanda::Recovery::Planner::make_plan(
 118         holding_file => $hf,
 119         dumpspec => $ds,
 120         plan_cb => $plan_cb);
 121
 122 This will create a plan to recover the data in C<$fh>.  The dumpspec is
 123 optional, but if present will be used to verify that the holding file contains
 124 the appropriate dump.
 125
 126 Similarly, if you have a list of label:fileno pairs to use, call C<make_plan>
 127 like this:
 128
 129     Amanda::Recovery::Planner::make_plan(
 130         filelist => [
 131             $label => [ $filenum, $filenum, .. ],
 132             $label => ..
 133         ],
 134         dumpspec => $ds,
 135         plan_cb => $plan_cb);
 136
 137 This will verify the requested files against the catalog and the dumpspec, then
 138 hand back a plan that essentially embodies C<filelist>.
 139
 140 Note that both of these functions will only create a single-dump plan.
 141
 142 =head2 PLANS
 143
 144 A Plan is a perl object describing the process for recovering zero or more
 145 dumpfiles.  Its principal components are dumps, in order, that are to be
 146 recovered, but the object presents some other interfaces that return useful
 147 information about the plan.
 148
 149 The C<'dumps'> key holds the list of dumps, in the order they should be
 150 performed.  Callers should shift dumps off this list to present to the Clerk.
 151
 152 To get a list of volumes that the plan requires, in order, use
 153 C<get_volume_list>.  Each volume is represented as a hash:
 154
 155   { label => 'DATA182', available => 1 }
 156
 157 where C<available> is false if the planner did not find this volume in the
 158 changer.  Planners which do not consult the changer will have a false value for
 159 C<available>.
 160
 161 Similarly, to get a list of holding files that the plan requires, in order, use
 162 C<get_holding_file_list>.  Each file is represented as a string giving the
 163 fully qualified pathname.
 164
 165 =cut
 166
 167 package Amanda::Recovery::Planner;
 168
 169 use strict;
 170 use warnings;
 171 use Carp;
 172
 173 sub make_plan {
 174     my %params = @_;
 175
 176     $params{'dumpspecs'} = [ $params{'dumpspec'} ]
 177         if exists $params{'dumpspec'};
 178
 179     my $plan = Amanda::Recovery::Planner::Plan->new({
 180         algo => $params{'algorithm'},
 181         chg => $params{'changer'},
 182         debug => $params{'debug'},
 183         one_dump_per_part => $params{'one_dump_per_part'},
 184     });
 185
 186     if ($params{'holding_file'}) {
 187         $plan->make_holding_plan(%params);
 188     } elsif ($params{'filelist'}) {
 189         $plan->make_plan_from_filelist(%params);
 190     } else {
 191         $plan->make_plan(%params);
 192     }
 193 }
 194
 195 package Amanda::Recovery::Planner::Plan;
 196
 197 use strict;
 198 use warnings;
 199 use Data::Dumper;
 200 use Carp;
 201
 202 use Amanda::Device qw( :constants );
 203 use Amanda::Holding;
 204 use Amanda::Header;
 205 use Amanda::Config qw( :getconf config_dir_relative );
 206 use Amanda::Debug qw( :logging );
 207 use Amanda::MainLoop;
 208 use Amanda::DB::Catalog;
 209 use Amanda::Tapelist;
 210
 211 sub new {
 212     my $class = shift;
 213     my $self = shift;
 214
 215     $self->{'debug'} = $Amanda::Config::debug_recovery
 216         if not defined $self->{'debug'}
 217             or $Amanda::Config::debug_recovery > $self->{'debug'};
 218
 219     return bless($self, $class);
 220 }
 221
 222 sub make_plan {
 223     my $self = shift;
 224     my %params = @_;
 225
 226     for my $rq_param qw(changer plan_cb dumpspecs) {
 227         croak "required parameter '$rq_param' mising"
 228             unless exists $params{$rq_param};
 229     }
 230     my $dumpspecs = $params{'dumpspecs'};
 231
 232     # first, get the set of dumps that match these dumpspecs
 233     my @dumps = Amanda::DB::Catalog::get_dumps(dumpspecs => $dumpspecs);
 234
 235     # now "bin" those by host/disk/dump_ts/level
 236     my %dumps;
 237     for my $dump (@dumps) {
 238         my $k = join("\0", $dump->{'hostname'}, $dump->{'diskname'},
 239                            $dump->{'dump_timestamp'}, $dump->{'level'});
 240         $dumps{$k} = [] unless exists $dumps{$k};
 241         push @{$dumps{$k}}, $dump;
 242     }
 243
 244     # now select the "best" of each set of dumps, and put that in @dumps
 245     @dumps = ();
 246     for my $options (values %dumps) {
 247         my @options = @$options;
 248         # if there's only one option, the choice is easy
 249         if (@options == 1) {
 250             push @dumps, $options[0];
 251             next;
 252         }
 253
 254         # if there are several, narrow to those with an OK status or barring that,
 255         # those with a PARTIAL status.  FAIL need not apply.
 256         my @ok_options = grep { $_->{'status'} eq 'OK' } @options;
 257         my @partial_options = grep { $_->{'status'} eq 'PARTIAL' } @options;
 258
 259         if (@ok_options) {
 260             @options = @ok_options;
 261         } else {
 262             @options = @partial_options;
 263         }
 264
 265         # now, take the one written longest ago - this gets us the dump on secondary
 266         # media if it hasn't been overwritten, otherwise the dump on tertiary media,
 267         # etc.  Note that this also prefers dumps on holding disk, since they are
 268         # tagged with a write_timestamp of 0
 269         @options = Amanda::DB::Catalog::sort_dumps(['write_timestamp'], @options);
 270         push @dumps, $options[0];
 271     }
 272
 273     # at this point we have exactly one instance of each dump in @dumps.
 274
 275     # If one_dump_per_part was specified, rearrange @dumps to have a distinct
 276     # dump object for each part.
 277     if ($self->{'one_dump_per_part'}) {
 278         @dumps = $self->split_dumps_per_part(\@dumps);
 279     }
 280
 281     # now sort the dumps in order by their constituent parts.  This sorts based
 282     # on write_timestamp, then on the label of the first part of the dump,
 283     # using the tapelist to order the labels.  Where labels match, it sorts on
 284     # the part's filenum.  This should sort the dumps into the order in which
 285     # they were written, with holding dumps coming in at the head of the list.
 286     my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
 287     my $tapelist = Amanda::Tapelist::read_tapelist($tapelist_filename);
 288
 289     my $sortfn = sub {
 290         my $rv;
 291         my $tle;
 292
 293         return $rv
 294             if ($rv = $a->{'write_timestamp'} cmp $b->{'write_timestamp'});
 295
 296         # above will take care of comparing a holding dump to an on-media dump, but
 297         # if both are on holding then we need to compare them lexically
 298         if (exists $a->{'parts'}[1]{'holding_file'}
 299         and exists $b->{'parts'}[1]{'holding_file'}) {
 300             return $a->{'parts'}[1]{'holding_file'} cmp $b->{'parts'}[1]{'holding_file'};
 301         }
 302
 303         my ($alabel, $blabel) = (
 304             $a->{'parts'}[1]{'label'},
 305             $b->{'parts'}[1]{'label'},
 306         );
 307
 308         my ($apos, $bpos);
 309         $apos = $tle->{'position'}
 310             if (($tle = $tapelist->lookup_tapelabel($alabel)));
 311         $bpos = $tle->{'position'}
 312             if (($tle = $tapelist->lookup_tapelabel($blabel)));
 313         return ($bpos <=> $apos) # not: reversed for "oldest to newest"
 314             if defined $bpos && defined $apos && ($bpos <=> $apos);
 315
 316         # if a tape wasn't in the tapelist, just sort the labels lexically (this
 317         # really shouldn't happen)
 318         if (!defined $bpos || !defined $apos) {
 319             return $alabel cmp $blabel
 320                 if defined $alabel and defined $blabel and $alabel cmp $blabel ;
 321         }
 322
 323         # finally, the dumps are on the same volume, so just sort by filenum
 324         return $a->{'parts'}[1]{'filenum'} <=> $b->{'parts'}[1]{'filenum'};
 325     };
 326     @dumps = sort $sortfn @dumps;
 327
 328     $self->{'dumps'} = \@dumps;
 329
 330     Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
 331 }
 332
 333 sub make_holding_plan {
 334     my $self = shift;
 335     my %params = @_;
 336
 337     for my $rq_param qw(holding_file plan_cb) {
 338         croak "required parameter '$rq_param' mising"
 339             unless exists $params{$rq_param};
 340     }
 341
 342     # This is a little tricky.  The idea is to open up the holding file and
 343     # read its header, then find that dump in the catalog.  This may seem like
 344     # the long way around, but it adds an extra layer of security to the
 345     # recovery process, as it prevents recovery from arbitrary files on the
 346     # filesystem that are not under a recognized holding directory.
 347
 348     my $hdr = Amanda::Holding::get_header($params{'holding_file'});
 349     if (!$hdr or $hdr->{'type'} != $Amanda::Header::F_DUMPFILE) {
 350         return $params{'plan_cb'}->(
 351                 "could not open '$params{holding_file}': missing or not a holding file");
 352     }
 353
 354     # look up this holding file in the catalog, adding the dumpspec we were
 355     # given so that get_dumps will compare against it for us.
 356     my $dump_timestamp = $hdr->{'datestamp'};
 357     my $hostname = $hdr->{'name'};
 358     my $diskname = $hdr->{'disk'};
 359     my $level = $hdr->{'dumplevel'};
 360     my @dumps = Amanda::DB::Catalog::get_dumps(
 361             $params{'dumpspec'}? (dumpspecs => [ $params{'dumpspec'} ]) : (),
 362             dump_timestamp => $dump_timestamp,
 363             hostname => $hostname,
 364             diskname => $diskname,
 365             level => $level,
 366             holding => 1,
 367         );
 368
 369     if (!@dumps) {
 370         return $params{'plan_cb'}->(
 371                 "Specified holding file does not match dumpspec");
 372     }
 373
 374     # this would be weird..
 375     $self->dbg("got multiple dumps from Amanda::DB::Catalog for a holding file!")
 376         if (@dumps > 1);
 377
 378     # arbitrarily keepy the first dump if we got several
 379     $self->{'dumps'} = [ $dumps[0] ];
 380
 381     Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
 382 }
 383
 384 sub make_plan_from_filelist {
 385     my $self = shift;
 386     my %params = @_;
 387
 388     for my $rq_param qw(filelist plan_cb) {
 389         croak "required parameter '$rq_param' mising"
 390             unless exists $params{$rq_param};
 391     }
 392
 393     # This is similarly tricky - in this case, we search for dumps matching
 394     # both the dumpspec and the labels, filter that down to just the parts we
 395     # want, and then check that only one dump remains.  Then we look up that
 396     # dump.
 397
 398     my @labels;
 399     my %files;
 400     my @filelist = @{$params{'filelist'}};
 401     while (@filelist) {
 402         my $label = shift @filelist;
 403         push @labels, $label;
 404         $files{$label} = shift @filelist;
 405     }
 406
 407     my @parts = Amanda::DB::Catalog::get_parts(
 408             $params{'dumpspec'}? (dumpspecs => [ $params{'dumpspec'} ]) : (),
 409             labels => [ @labels ]);
 410
 411     # filter down to the parts that match filelist (using %files)
 412     my $in_filelist = sub {
 413         my $filenum = $_->{'filenum'};
 414         grep { $_ == $filenum } @{$files{$_->{'label'}}};
 415     };
 416     @parts = grep $in_filelist, @parts;
 417
 418     # extract the dumps, using a hash to ensure uniqueness
 419     my %dumps = map { my $d = $_->{'dump'}; ($d, $d) } @parts;
 420     my @dumps = values %dumps;
 421
 422     if (!@dumps) {
 423         return $params{'plan_cb'}->(
 424                 "Specified file list does not match dumpspec");
 425     } elsif (@dumps > 1) {
 426         return $params{'plan_cb'}->(
 427                 "Specified file list matches multiple dumps; cannot continue recovery");
 428     }
 429
 430     # now, because of the weak linking used by Amanda::DB::Catalog, we need to
 431     # re-query for this dump.  If we don't do this, the parts will all be
 432     # garbage-collected when we hand back the plan.  This is, chartiably, "less than
 433     # ideal".  Note that this has the side-effect of filling in any parts of the
 434     # dump that were missing from the filelist.
 435     @dumps = Amanda::DB::Catalog::get_dumps(
 436         hostname => $dumps[0]->{'hostname'},
 437         diskname => $dumps[0]->{'diskname'},
 438         level => $dumps[0]->{'level'},
 439         dump_timestamp => $dumps[0]->{'dump_timestamp'},
 440         write_timestamp => $dumps[0]->{'write_timestamp'},
 441         dumpspecs => $params{'dumpspecs'});
 442
 443     # sanity check
 444     die unless @dumps;
 445     $self->{'dumps'} = [ $dumps[0] ];
 446
 447     Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
 448 }
 449
 450 sub split_dumps_per_part {
 451     my $self = shift;
 452     my ($dumps) = @_;
 453
 454     my @new_dumps;
 455
 456     for my $dump (@$dumps) {
 457         for my $part (@{$dump->{'parts'}}) {
 458             my ($newdump, $newpart);
 459
 460             # skip part 0
 461             next unless defined $part;
 462
 463             # shallow copy the dump and part objects
 464             $newdump = do { my %t = %$dump; \%t; };
 465             $newpart = do { my %t = %$part; \%t; };
 466
 467             # overwrite the interlinking
 468             $newpart->{'dump'} = $newdump;
 469             $newdump->{'parts'} = [ undef, $newpart ];
 470
 471             $newdump->{'single_part'} = 1;
 472
 473             push @new_dumps, $newdump;
 474         }
 475     }
 476
 477     return @new_dumps;
 478 }
 479
 480 sub get_volume_list {
 481     my $self = shift;
 482     my $last_label;
 483     my @volumes;
 484
 485     for my $dump (@{$self->{'dumps'}}) {
 486         for my $part (@{$dump->{'parts'}}) {
 487             next unless defined $part; # skip parts[0]
 488             next unless defined $part->{'label'}; # skip holding parts
 489             if (!defined $last_label || $part->{'label'} ne $last_label) {
 490                 $last_label = $part->{'label'};
 491                 push @volumes, { label => $last_label, available => 0 };
 492             }
 493         }
 494     }
 495
 496     return @volumes;
 497 }
 498
 499 sub get_holding_file_list {
 500     my $self = shift;
 501     my @hfiles;
 502
 503     for my $dump (@{$self->{'dumps'}}) {
 504         for my $part (@{$dump->{'parts'}}) {
 505             next unless defined $part; # skip parts[0]
 506             next unless defined $part->{'holding_file'}; # skip on-media dumps
 507             push @hfiles, $part->{'holding_file'};
 508         }
 509     }
 510
 511     return @hfiles;
 512 }
 513
 514 sub dbg {
 515     my ($self, $msg) = @_;
 516     if ($self->{'debug'}) {
 517         debug("Amanda::Recovery::Planner: $msg");
 518     }
 519 }
 520
 521 1;