1 # Copyright (c) 2010 Zmanda, Inc. All Rights Reserved.
3 # This library is free software; you can redistribute it and/or modify it
4 # under the terms of the GNU Lesser General Public License version 2.1 as
5 # published by the Free Software Foundation.
7 # This library is distributed in the hope that it will be useful, but
8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
9 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
10 # License for more details.
12 # You should have received a copy of the GNU Lesser General Public License
13 # along with this library; if not, write to the Free Software Foundation,
14 # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
16 # Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
17 # Sunnyvale, CA 94086, USA, or: http://www.zmanda.com
21 Amanda::Recovery::Planner - use the catalog to plan recoveries
27 $subs{'make_plan'} = make_cb(make_plan => sub {
28 Amanda::Recovery::Planner::make_plan(
29 dumpspecs => [ $ds1, $ds2 ],
32 plan_cb => $subs{'plan_cb'});
35 $subs{'plan_cb'} = make_cb(plan_cb => sub {
40 $subs{'start_next_dumpfile'}->();
43 $subs{'start_next_dumpfile'} = make_cb(start_next_dumpfile => sub {
44 my $dump = $plan->shift_dump();
49 print "recovering ", $dump->{'hostname'}, " ", $dump->{'diskname'}, "\n";
50 $clerk->get_xfer_src( .. dump => $dump .. );
56 This package determines the optimal way to recover dump files from storage.
57 Its function is superficially fairly simple: given a collection of desired
58 dumpfiles, it returns a Plan to recover those dumpfiles, specifying exactly the
59 volumes and files that are needed, and the order in which they should be
64 Several algorithms will soon be available for selecting volumes when a dumpfile
65 appears in several places (e.g., from an amvault operation). At the moment,
66 the algorithm argument should be omitted, as this will eventually indicate that
67 the user-configured algorithm should be applied.
69 =head2 INSTANTIATING A PLAN
71 For most purposes, you should call C<make_plan> with the desired dumpspecs, a
72 changer, and a callback:
74 Amanda::Recovery::Planner::make_plan(
75 dumpspecs => [ $ds1, $ds2, .. ],
79 As a shortcut, you may also specify a single dumpspec:
81 Amanda::Recovery::Planner::make_plan(
86 Note that in this case, the resulting plan may contain more than one dump, if
87 the dumpspec was not unambiguous.
89 To select the planner algorithm, pass an C<algorithm> argument. This argument
90 is currently ignored and should be omitted. If the optional argument C<debug>
91 is given with a true value, then the Planner will log additional debug
92 information to the Amanda debug logs. Debugging is automatically enabled if
93 the C<DEBUG_RECOVERY> configuration parameter is set to anything greater than
96 The optional argument C<one_dump_per_part> will create a "no-reassembly" plan,
97 where each part appears as the only part in a unique dump. The dump objects
98 will have the key C<single_part> set to 1.
100 The C<plan_cb> is called with two arguments:
102 $plan_cb->($err, $plan);
104 If C<$err> is defined, it describes an error that occurred; otherwise, C<$plan>
105 is the generated plan, as described below.
107 Some algorithms may consult the changer's inventory to determine what volumes
108 are available. It is because of this asynchronous operation that C<make_plan>
109 takes a callback instead of simply returning the plan.
111 =head3 Pre-defined Plans
113 In some cases, you already know exactly where the data is, and just need a
114 proper plan object to hand to L<Amanda::Recovery::Clerk>. One such case is a
115 recovery from a holding file. In this case, use C<make_plan> like this:
117 Amanda::Recovery::Planner::make_plan(
120 plan_cb => $plan_cb);
122 This will create a plan to recover the data in C<$fh>. The dumpspec is
123 optional, but if present will be used to verify that the holding file contains
124 the appropriate dump.
126 Similarly, if you have a list of label:fileno pairs to use, call C<make_plan>
129 Amanda::Recovery::Planner::make_plan(
131 $label => [ $filenum, $filenum, .. ],
135 plan_cb => $plan_cb);
137 This will verify the requested files against the catalog and the dumpspec, then
138 hand back a plan that essentially embodies C<filelist>.
140 Note that both of these functions will only create a single-dump plan.
144 A Plan is a perl object describing the process for recovering zero or more
145 dumpfiles. Its principal components are dumps, in order, that are to be
146 recovered, but the object presents some other interfaces that return useful
147 information about the plan.
149 The C<'dumps'> key holds the list of dumps, in the order they should be
150 performed. Callers should shift dumps off this list to present to the Clerk.
152 To get a list of volumes that the plan requires, in order, use
153 C<get_volume_list>. Each volume is represented as a hash:
155 { label => 'DATA182', available => 1 }
157 where C<available> is false if the planner did not find this volume in the
158 changer. Planners which do not consult the changer will have a false value for
161 Similarly, to get a list of holding files that the plan requires, in order, use
162 C<get_holding_file_list>. Each file is represented as a string giving the
163 fully qualified pathname.
167 package Amanda::Recovery::Planner;
176 $params{'dumpspecs'} = [ $params{'dumpspec'} ]
177 if exists $params{'dumpspec'};
179 my $plan = Amanda::Recovery::Planner::Plan->new({
180 algo => $params{'algorithm'},
181 chg => $params{'changer'},
182 debug => $params{'debug'},
183 one_dump_per_part => $params{'one_dump_per_part'},
186 if ($params{'holding_file'}) {
187 $plan->make_holding_plan(%params);
188 } elsif ($params{'filelist'}) {
189 $plan->make_plan_from_filelist(%params);
191 $plan->make_plan(%params);
195 package Amanda::Recovery::Planner::Plan;
202 use Amanda::Device qw( :constants );
205 use Amanda::Config qw( :getconf config_dir_relative );
206 use Amanda::Debug qw( :logging );
207 use Amanda::MainLoop;
208 use Amanda::DB::Catalog;
209 use Amanda::Tapelist;
215 $self->{'debug'} = $Amanda::Config::debug_recovery
216 if not defined $self->{'debug'}
217 or $Amanda::Config::debug_recovery > $self->{'debug'};
219 return bless($self, $class);
226 for my $rq_param qw(changer plan_cb dumpspecs) {
227 croak "required parameter '$rq_param' mising"
228 unless exists $params{$rq_param};
230 my $dumpspecs = $params{'dumpspecs'};
232 # first, get the set of dumps that match these dumpspecs
233 my @dumps = Amanda::DB::Catalog::get_dumps(dumpspecs => $dumpspecs);
235 # now "bin" those by host/disk/dump_ts/level
237 for my $dump (@dumps) {
238 my $k = join("\0", $dump->{'hostname'}, $dump->{'diskname'},
239 $dump->{'dump_timestamp'}, $dump->{'level'});
240 $dumps{$k} = [] unless exists $dumps{$k};
241 push @{$dumps{$k}}, $dump;
244 # now select the "best" of each set of dumps, and put that in @dumps
246 for my $options (values %dumps) {
247 my @options = @$options;
248 # if there's only one option, the choice is easy
250 push @dumps, $options[0];
254 # if there are several, narrow to those with an OK status or barring that,
255 # those with a PARTIAL status. FAIL need not apply.
256 my @ok_options = grep { $_->{'status'} eq 'OK' } @options;
257 my @partial_options = grep { $_->{'status'} eq 'PARTIAL' } @options;
260 @options = @ok_options;
262 @options = @partial_options;
265 # now, take the one written longest ago - this gets us the dump on secondary
266 # media if it hasn't been overwritten, otherwise the dump on tertiary media,
267 # etc. Note that this also prefers dumps on holding disk, since they are
268 # tagged with a write_timestamp of 0
269 @options = Amanda::DB::Catalog::sort_dumps(['write_timestamp'], @options);
270 push @dumps, $options[0];
273 # at this point we have exactly one instance of each dump in @dumps.
275 # If one_dump_per_part was specified, rearrange @dumps to have a distinct
276 # dump object for each part.
277 if ($self->{'one_dump_per_part'}) {
278 @dumps = $self->split_dumps_per_part(\@dumps);
281 # now sort the dumps in order by their constituent parts. This sorts based
282 # on write_timestamp, then on the label of the first part of the dump,
283 # using the tapelist to order the labels. Where labels match, it sorts on
284 # the part's filenum. This should sort the dumps into the order in which
285 # they were written, with holding dumps coming in at the head of the list.
286 my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
287 my $tapelist = Amanda::Tapelist::read_tapelist($tapelist_filename);
294 if ($rv = $a->{'write_timestamp'} cmp $b->{'write_timestamp'});
296 # above will take care of comparing a holding dump to an on-media dump, but
297 # if both are on holding then we need to compare them lexically
298 if (exists $a->{'parts'}[1]{'holding_file'}
299 and exists $b->{'parts'}[1]{'holding_file'}) {
300 return $a->{'parts'}[1]{'holding_file'} cmp $b->{'parts'}[1]{'holding_file'};
303 my ($alabel, $blabel) = (
304 $a->{'parts'}[1]{'label'},
305 $b->{'parts'}[1]{'label'},
309 $apos = $tle->{'position'}
310 if (($tle = $tapelist->lookup_tapelabel($alabel)));
311 $bpos = $tle->{'position'}
312 if (($tle = $tapelist->lookup_tapelabel($blabel)));
313 return ($bpos <=> $apos) # not: reversed for "oldest to newest"
314 if defined $bpos && defined $apos && ($bpos <=> $apos);
316 # if a tape wasn't in the tapelist, just sort the labels lexically (this
317 # really shouldn't happen)
318 if (!defined $bpos || !defined $apos) {
319 return $alabel cmp $blabel
320 if defined $alabel and defined $blabel and $alabel cmp $blabel ;
323 # finally, the dumps are on the same volume, so just sort by filenum
324 return $a->{'parts'}[1]{'filenum'} <=> $b->{'parts'}[1]{'filenum'};
326 @dumps = sort $sortfn @dumps;
328 $self->{'dumps'} = \@dumps;
330 Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
333 sub make_holding_plan {
337 for my $rq_param qw(holding_file plan_cb) {
338 croak "required parameter '$rq_param' mising"
339 unless exists $params{$rq_param};
342 # This is a little tricky. The idea is to open up the holding file and
343 # read its header, then find that dump in the catalog. This may seem like
344 # the long way around, but it adds an extra layer of security to the
345 # recovery process, as it prevents recovery from arbitrary files on the
346 # filesystem that are not under a recognized holding directory.
348 my $hdr = Amanda::Holding::get_header($params{'holding_file'});
349 if (!$hdr or $hdr->{'type'} != $Amanda::Header::F_DUMPFILE) {
350 return $params{'plan_cb'}->(
351 "could not open '$params{holding_file}': missing or not a holding file");
354 # look up this holding file in the catalog, adding the dumpspec we were
355 # given so that get_dumps will compare against it for us.
356 my $dump_timestamp = $hdr->{'datestamp'};
357 my $hostname = $hdr->{'name'};
358 my $diskname = $hdr->{'disk'};
359 my $level = $hdr->{'dumplevel'};
360 my @dumps = Amanda::DB::Catalog::get_dumps(
361 $params{'dumpspec'}? (dumpspecs => [ $params{'dumpspec'} ]) : (),
362 dump_timestamp => $dump_timestamp,
363 hostname => $hostname,
364 diskname => $diskname,
370 return $params{'plan_cb'}->(
371 "Specified holding file does not match dumpspec");
374 # this would be weird..
375 $self->dbg("got multiple dumps from Amanda::DB::Catalog for a holding file!")
378 # arbitrarily keepy the first dump if we got several
379 $self->{'dumps'} = [ $dumps[0] ];
381 Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
384 sub make_plan_from_filelist {
388 for my $rq_param qw(filelist plan_cb) {
389 croak "required parameter '$rq_param' mising"
390 unless exists $params{$rq_param};
393 # This is similarly tricky - in this case, we search for dumps matching
394 # both the dumpspec and the labels, filter that down to just the parts we
395 # want, and then check that only one dump remains. Then we look up that
400 my @filelist = @{$params{'filelist'}};
402 my $label = shift @filelist;
403 push @labels, $label;
404 $files{$label} = shift @filelist;
407 my @parts = Amanda::DB::Catalog::get_parts(
408 $params{'dumpspec'}? (dumpspecs => [ $params{'dumpspec'} ]) : (),
409 labels => [ @labels ]);
411 # filter down to the parts that match filelist (using %files)
412 my $in_filelist = sub {
413 my $filenum = $_->{'filenum'};
414 grep { $_ == $filenum } @{$files{$_->{'label'}}};
416 @parts = grep $in_filelist, @parts;
418 # extract the dumps, using a hash to ensure uniqueness
419 my %dumps = map { my $d = $_->{'dump'}; ($d, $d) } @parts;
420 my @dumps = values %dumps;
423 return $params{'plan_cb'}->(
424 "Specified file list does not match dumpspec");
425 } elsif (@dumps > 1) {
426 return $params{'plan_cb'}->(
427 "Specified file list matches multiple dumps; cannot continue recovery");
430 # now, because of the weak linking used by Amanda::DB::Catalog, we need to
431 # re-query for this dump. If we don't do this, the parts will all be
432 # garbage-collected when we hand back the plan. This is, chartiably, "less than
433 # ideal". Note that this has the side-effect of filling in any parts of the
434 # dump that were missing from the filelist.
435 @dumps = Amanda::DB::Catalog::get_dumps(
436 hostname => $dumps[0]->{'hostname'},
437 diskname => $dumps[0]->{'diskname'},
438 level => $dumps[0]->{'level'},
439 dump_timestamp => $dumps[0]->{'dump_timestamp'},
440 write_timestamp => $dumps[0]->{'write_timestamp'},
441 dumpspecs => $params{'dumpspecs'});
445 $self->{'dumps'} = [ $dumps[0] ];
447 Amanda::MainLoop::call_later($params{'plan_cb'}, undef, $self);
450 sub split_dumps_per_part {
456 for my $dump (@$dumps) {
457 for my $part (@{$dump->{'parts'}}) {
458 my ($newdump, $newpart);
461 next unless defined $part;
463 # shallow copy the dump and part objects
464 $newdump = do { my %t = %$dump; \%t; };
465 $newpart = do { my %t = %$part; \%t; };
467 # overwrite the interlinking
468 $newpart->{'dump'} = $newdump;
469 $newdump->{'parts'} = [ undef, $newpart ];
471 $newdump->{'single_part'} = 1;
473 push @new_dumps, $newdump;
480 sub get_volume_list {
485 for my $dump (@{$self->{'dumps'}}) {
486 for my $part (@{$dump->{'parts'}}) {
487 next unless defined $part; # skip parts[0]
488 next unless defined $part->{'label'}; # skip holding parts
489 if (!defined $last_label || $part->{'label'} ne $last_label) {
490 $last_label = $part->{'label'};
491 push @volumes, { label => $last_label, available => 0 };
499 sub get_holding_file_list {
503 for my $dump (@{$self->{'dumps'}}) {
504 for my $part (@{$dump->{'parts'}}) {
505 next unless defined $part; # skip parts[0]
506 next unless defined $part->{'holding_file'}; # skip on-media dumps
507 push @hfiles, $part->{'holding_file'};
515 my ($self, $msg) = @_;
516 if ($self->{'debug'}) {
517 debug("Amanda::Recovery::Planner: $msg");