1 # Copyright (c) 2010-2012 Zmanda, Inc. All Rights Reserved.
3 # This library is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU Lesser General Public
5 #* License as published by the Free Software Foundation; either
6 # version 2.1 of the License, or (at your option) any later version.
8 # This library is distributed in the hope that it will be useful, but
9 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
11 # License for more details.
13 # You should have received a copy of the GNU Lesser General Public License
14 # along with this library; if not, write to the Free Software Foundation,
15 # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
17 # Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
18 # Sunnyvale, CA 94086, USA, or: http://www.zmanda.com
22 Amanda::Recovery::Planner - use the catalog to plan recoveries
28 $subs{'make_plan'} = make_cb(make_plan => sub {
29 Amanda::Recovery::Planner::make_plan(
30 dumpspecs => [ $ds1, $ds2 ],
33 plan_cb => $subs{'plan_cb'});
36 $subs{'plan_cb'} = make_cb(plan_cb => sub {
41 $subs{'start_next_dumpfile'}->();
44 $subs{'start_next_dumpfile'} = make_cb(start_next_dumpfile => sub {
45 my $dump = shift @{$plan->{'dumps'}};
50 print "recovering ", $dump->{'hostname'}, " ", $dump->{'diskname'}, "\n";
51 $clerk->get_xfer_src( .. dump => $dump .. );
57 This package determines the optimal way to recover dump files from storage.
58 Its function is superficially fairly simple: given a collection of desired
59 dumpfiles, it returns a Plan to recover those dumpfiles, specifying exactly the
60 volumes and files that are needed, and the order in which they should be
65 Several algorithms will soon be available for selecting volumes when a dumpfile
66 appears in several places (e.g., from an amvault operation). At the moment,
67 the algorithm argument should be omitted, as this will eventually indicate that
68 the user-configured algorithm should be applied.
70 =head2 INSTANTIATING A PLAN
72 For most purposes, you should call C<make_plan> with the desired dumpspecs, a
73 changer, and a callback:
75 Amanda::Recovery::Planner::make_plan(
76 dumpspecs => [ $ds1, $ds2, .. ],
80 As a shortcut, you may also specify a single dumpspec:
82 Amanda::Recovery::Planner::make_plan(
87 Note that in this case, the resulting plan may contain more than one dump, if
88 the dumpspec was not unambiguous.
90 To select the planner algorithm, pass an C<algorithm> argument. This argument
91 is currently ignored and should be omitted. If the optional argument C<debug>
92 is given with a true value, then the Planner will log additional debug
93 information to the Amanda debug logs. Debugging is automatically enabled if
94 the C<DEBUG_RECOVERY> configuration parameter is set to anything greater than
97 The optional argument C<one_dump_per_part> will create a "no-reassembly" plan,
98 where each part appears as the only part in a unique dump. The dump objects
99 will have the key C<single_part> set to 1.
101 The C<plan_cb> is called with two arguments:
103 $plan_cb->($err, $plan);
105 If C<$err> is defined, it describes an error that occurred; otherwise, C<$plan>
106 is the generated plan, as described below.
108 Some algorithms may consult the changer's inventory to determine what volumes
109 are available. It is because of this asynchronous operation that C<make_plan>
110 takes a callback instead of simply returning the plan.
112 =head3 Pre-defined Plans
114 In some cases, you already know exactly where the data is, and just need a
115 proper plan object to hand to L<Amanda::Recovery::Clerk>. One such case is a
116 recovery from a holding file. In this case, use C<make_plan> like this:
118 Amanda::Recovery::Planner::make_plan(
121 plan_cb => $plan_cb);
123 This will create a plan to recover the data in C<$fh>. The dumpspec is
124 optional, but if present will be used to verify that the holding file contains
125 the appropriate dump.
127 Similarly, if you have a list of label:fileno pairs to use, call C<make_plan>
130 Amanda::Recovery::Planner::make_plan(
132 $label => [ $filenum, $filenum, .. ],
136 plan_cb => $plan_cb);
138 This will verify the requested files against the catalog and the dumpspec, then
139 hand back a plan that essentially embodies C<filelist>.
141 Note that both of these functions will only create a single-dump plan.
145 A Plan is a perl object describing the process for recovering zero or more
146 dumpfiles. Its principal components are dumps, in order, that are to be
147 recovered, but the object presents some other interfaces that return useful
148 information about the plan.
150 The C<'dumps'> key holds the list of dumps, in the order they should be
151 performed. Callers should shift dumps off this list to present to the Clerk.
153 To get a list of volumes that the plan requires, in order, use
154 C<get_volume_list>. Each volume is represented as a hash:
156 { label => 'DATA182', available => 1 }
158 where C<available> is false if the planner did not find this volume in the
159 changer. Planners which do not consult the changer will have a false value for
162 Similarly, to get a list of holding files that the plan requires, in order, use
163 C<get_holding_file_list>. Each file is represented as a string giving the
164 fully qualified pathname.
168 package Amanda::Recovery::Planner;
177 $params{'dumpspecs'} = [ $params{'dumpspec'} ]
178 if exists $params{'dumpspec'};
180 my $plan = Amanda::Recovery::Planner::Plan->new({
181 algo => $params{'algorithm'},
182 chg => $params{'changer'},
183 debug => $params{'debug'},
184 one_dump_per_part => $params{'one_dump_per_part'},
187 if (exists $params{'holding_file'}) {
188 $plan->make_holding_plan(%params);
189 } elsif (exists $params{'filelist'}) {
190 $plan->make_plan_from_filelist(%params);
192 $plan->make_plan(%params);
196 package Amanda::Recovery::Planner::Plan;
203 use Amanda::Device qw( :constants );
206 use Amanda::Config qw( :getconf config_dir_relative );
207 use Amanda::Debug qw( :logging );
208 use Amanda::MainLoop;
209 use Amanda::DB::Catalog;
210 use Amanda::Tapelist;
216 $self->{'debug'} = $Amanda::Config::debug_recovery
217 if not defined $self->{'debug'}
218 or $Amanda::Config::debug_recovery > $self->{'debug'};
220 return bless($self, $class);
225 return shift @{$self->{'dumps'}};
232 for my $rq_param (qw(changer plan_cb dumpspecs)) {
233 croak "required parameter '$rq_param' mising"
234 unless exists $params{$rq_param};
236 my $dumpspecs = $params{'dumpspecs'};
238 # first, get the set of dumps that match these dumpspecs
239 my @dumps = Amanda::DB::Catalog::get_dumps(dumpspecs => $dumpspecs);
241 # now "bin" those by host/disk/dump_ts/level
243 for my $dump (@dumps) {
244 my $k = join("\0", $dump->{'hostname'}, $dump->{'diskname'},
245 $dump->{'dump_timestamp'}, $dump->{'level'});
246 $dumps{$k} = [] unless exists $dumps{$k};
247 push @{$dumps{$k}}, $dump;
250 # now select the "best" of each set of dumps, and put that in @dumps
252 for my $options (values %dumps) {
253 my @options = @$options;
254 # if there's only one option, the choice is easy
256 push @dumps, $options[0];
260 # if there are several, narrow to those with an OK status or barring that,
261 # those with a PARTIAL status. FAIL need not apply.
262 my @ok_options = grep { $_->{'status'} eq 'OK' } @options;
263 my @partial_options = grep { $_->{'status'} eq 'PARTIAL' } @options;
266 @options = @ok_options;
268 @options = @partial_options;
271 # now, take the one written longest ago - this gets us the dump on secondary
272 # media if it hasn't been overwritten, otherwise the dump on tertiary media,
273 # etc. Note that this also prefers dumps on holding disk, since they are
274 # tagged with a write_timestamp of 0
275 @options = Amanda::DB::Catalog::sort_dumps(['write_timestamp'], @options);
276 push @dumps, $options[0];
279 # at this point we have exactly one instance of each dump in @dumps.
281 # If one_dump_per_part was specified, rearrange @dumps to have a distinct
282 # dump object for each part.
283 if ($self->{'one_dump_per_part'}) {
284 @dumps = $self->split_dumps_per_part(\@dumps);
287 # now sort the dumps in order by their constituent parts. This sorts based
288 # on write_timestamp, then on the label of the first part of the dump,
289 # using the tapelist to order the labels. Where labels match, it sorts on
290 # the part's filenum. This should sort the dumps into the order in which
291 # they were written, with holding dumps coming in at the head of the list.
292 my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
293 my $tapelist = Amanda::Tapelist->new($tapelist_filename);
300 if ($rv = $a->{'write_timestamp'} cmp $b->{'write_timestamp'});
302 # above will take care of comparing a holding dump to an on-media dump, but
303 # if both are on holding then we need to compare them lexically
304 if (exists $a->{'parts'}[1]{'holding_file'}
305 and exists $b->{'parts'}[1]{'holding_file'}) {
306 return $a->{'parts'}[1]{'holding_file'} cmp $b->{'parts'}[1]{'holding_file'};
309 my ($alabel, $blabel) = (
310 $a->{'parts'}[1]{'label'},
311 $b->{'parts'}[1]{'label'},
315 $apos = $tle->{'position'}
316 if (($tle = $tapelist->lookup_tapelabel($alabel)));
317 $bpos = $tle->{'position'}
318 if (($tle = $tapelist->lookup_tapelabel($blabel)));
319 return ($bpos <=> $apos) # not: reversed for "oldest to newest"
320 if defined $bpos && defined $apos && ($bpos <=> $apos);
322 # if a tape wasn't in the tapelist, just sort the labels lexically (this
323 # really shouldn't happen)
324 if (!defined $bpos || !defined $apos) {
325 return $alabel cmp $blabel
326 if defined $alabel and defined $blabel and $alabel cmp $blabel ;
329 # finally, the dumps are on the same volume, so just sort by filenum
330 return $a->{'parts'}[1]{'filenum'} <=> $b->{'parts'}[1]{'filenum'};
332 @dumps = sort $sortfn @dumps;
334 $self->{'dumps'} = \@dumps;
336 Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
339 sub make_holding_plan {
343 for my $rq_param (qw(holding_file plan_cb)) {
344 croak "required parameter '$rq_param' mising"
345 unless exists $params{$rq_param};
348 # This is a little tricky. The idea is to open up the holding file and
349 # read its header, then find that dump in the catalog. This may seem like
350 # the long way around, but it adds an extra layer of security to the
351 # recovery process, as it prevents recovery from arbitrary files on the
352 # filesystem that are not under a recognized holding directory.
354 my $hdr = Amanda::Holding::get_header($params{'holding_file'});
355 if (!$hdr or $hdr->{'type'} != $Amanda::Header::F_DUMPFILE) {
356 return $params{'plan_cb'}->(
357 "could not open '$params{holding_file}': missing or not a holding file");
360 # look up this holding file in the catalog, adding the dumpspec we were
361 # given so that get_dumps will compare against it for us.
362 my $dump_timestamp = $hdr->{'datestamp'};
363 my $hostname = $hdr->{'name'};
364 my $diskname = $hdr->{'disk'};
365 my $level = $hdr->{'dumplevel'};
366 my @dumps = Amanda::DB::Catalog::get_dumps(
367 $params{'dumpspec'}? (dumpspecs => [ $params{'dumpspec'} ]) : (),
368 dump_timestamp => $dump_timestamp,
369 hostname => $hostname,
370 diskname => $diskname,
376 return $params{'plan_cb'}->(
377 "Specified holding file does not match dumpspec");
380 # this would be weird..
381 $self->dbg("got multiple dumps from Amanda::DB::Catalog for a holding file!")
384 # arbitrarily keepy the first dump if we got several
385 $self->{'dumps'} = [ $dumps[0] ];
387 Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
390 sub make_plan_from_filelist {
394 for my $rq_param (qw(filelist plan_cb)) {
395 croak "required parameter '$rq_param' mising"
396 unless exists $params{$rq_param};
399 my $steps = define_steps
400 cb_ref => \$params{'plan_cb'};
402 step get_inventory => sub {
403 if (defined $params{'chg'} and $params{'chg'}->have_inventory()) {
404 return $params{'chg'}->inventory( inventory_cb => $steps->{'got_inventory'});
406 return $steps->{'got_inventory'}->(undef, undef);
409 step got_inventory => sub {
410 my ($err, $inventory) = @_;
412 # This is similarly tricky - in this case, we search for dumps matching
413 # both the dumpspec and the labels, filter that down to just the parts we
414 # want, and then check that only one dump remains. Then we look up that
419 my @filelist = @{$params{'filelist'}};
421 my $label = shift @filelist;
422 push @labels, $label;
423 $files{$label} = shift @filelist;
426 my @parts = Amanda::DB::Catalog::get_parts(
427 $params{'dumpspec'}? (dumpspecs => [ $params{'dumpspec'} ]) : (),
428 labels => [ @labels ]);
430 # filter down to the parts that match filelist (using %files)
432 my $filenum = $_->{'filenum'};
433 grep { $_ == $filenum } @{$files{$_->{'label'}}};
436 # extract the dumps, using a hash (on the perl identity of the dump) to
438 my %dumps = map { my $d = $_->{'dump'}; ($d, $d) } @parts;
439 my @dumps = values %dumps;
442 return $params{'plan_cb'}->(
443 "Specified file list does not match dumpspec");
444 } elsif (@dumps > 1) {
445 # Check if they are all for the same dump
446 my $dump_timestamp = $dumps[0]->{'dump_timestamp'};
447 my $hostname = $dumps[0]->{'hostname'};
448 my $diskname = $dumps[0]->{'diskname'};
449 my $level = $dumps[0]->{'level'};
450 my $orig_kb = $dumps[0]->{'orig_kb'};
452 foreach my $dump (@dumps) {
453 if ($dump_timestamp != $dump->{'dump_timestamp'} ||
454 $hostname ne $dump->{'hostname'} ||
455 $diskname ne $dump->{'diskname'} ||
456 $level != $dump->{'level'} ||
457 $orig_kb != $dump->{'orig_kb'}) {
458 return $params{'plan_cb'}->(
459 "Specified file list matches multiple dumps; cannot continue recovery");
463 # I would prefer the Planner to return alternate dump and the Clerk
464 # choose which one to use
465 if (defined $inventory) {
466 for my $dump (@dumps) {
467 my $all_part_found = 0;
469 for my $part (@{$dump->{'parts'}}) {
470 next if !defined $part;
472 foreach my $sl (@$inventory) {
473 if (defined $sl->{'label'} and
474 $sl->{'label'} eq $part->{'label'}) {
484 if ($part_found == 1) {
489 # the first one will be used
491 # will uses the first dump.
495 # now, because of the weak linking used by Amanda::DB::Catalog, we need to
496 # re-query for this dump. If we don't do this, the parts will all be
497 # garbage-collected when we hand back the plan. This is, chartiably, "less
498 # than ideal". Note that this has the side-effect of filling in any parts of
499 # the dump that were missing from the filelist.
500 @dumps = Amanda::DB::Catalog::get_dumps(
501 hostname => $dumps[0]->{'hostname'},
502 diskname => $dumps[0]->{'diskname'},
503 level => $dumps[0]->{'level'},
504 dump_timestamp => $dumps[0]->{'dump_timestamp'},
505 write_timestamp => $dumps[0]->{'write_timestamp'},
506 dumpspecs => $params{'dumpspecs'});
509 confess "no dumps" unless @dumps;
510 $self->{'dumps'} = [ $dumps[0] ];
512 Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
516 sub split_dumps_per_part {
522 for my $dump (@$dumps) {
523 for my $part (@{$dump->{'parts'}}) {
524 my ($newdump, $newpart);
527 next unless defined $part;
529 # shallow copy the dump and part objects
530 $newdump = do { my %t = %$dump; \%t; };
531 $newpart = do { my %t = %$part; \%t; };
533 # overwrite the interlinking
534 $newpart->{'dump'} = $newdump;
535 $newdump->{'parts'} = [ undef, $newpart ];
537 $newdump->{'single_part'} = 1;
539 push @new_dumps, $newdump;
546 sub get_volume_list {
551 for my $dump (@{$self->{'dumps'}}) {
552 for my $part (@{$dump->{'parts'}}) {
553 next unless defined $part; # skip parts[0]
554 next unless defined $part->{'label'}; # skip holding parts
555 if (!defined $last_label || $part->{'label'} ne $last_label) {
556 $last_label = $part->{'label'};
557 push @volumes, { label => $last_label, available => 0 };
565 sub get_holding_file_list {
569 for my $dump (@{$self->{'dumps'}}) {
570 for my $part (@{$dump->{'parts'}}) {
571 next unless defined $part; # skip parts[0]
572 next unless defined $part->{'holding_file'}; # skip on-media dumps
573 push @hfiles, $part->{'holding_file'};
581 my ($self, $msg) = @_;
582 if ($self->{'debug'}) {
583 debug("Amanda::Recovery::Planner: $msg");