git.gag.com Git - debian/amanda/blob - perl/Amanda/DB/Catalog.pm

   1 # Copyright (c) 2008, 2009, 2010 Zmanda, Inc.  All Rights Reserved.
   2 #
   3 # This program is free software; you can redistribute it and/or modify it
   4 # under the terms of the GNU General Public License version 2 as published
   5 # by the Free Software Foundation.
   6 #
   7 # This program is distributed in the hope that it will be useful, but
   8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   9 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  10 # for more details.
  11 #
  12 # You should have received a copy of the GNU General Public License along
  13 # with this program; if not, write to the Free Software Foundation, Inc.,
  14 # 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  15 #
  16 # Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
  17 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  18
  19 package Amanda::DB::Catalog;
  20
  21 =head1 NAME
  22
  23 Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
  24
  25 =head1 SYNOPSIS
  26
  27   use Amanda::DB::Catalog;
  28
  29   # get all dump timestamps on record
  30   my @timestamps = Amanda::DB::Catalog::get_timestamps();
  31
  32   # loop over those timestamps, printing dump info for each one
  33   for my $timestamp (@timestamps) {
  34       my @dumpfiles = Amanda::DB::Catalog::get_parts(
  35           timestamp => $timestamp,
  36           ok => 1
  37       );
  38       print "$timstamp:\n";
  39       for my $dumpfile (@dumpfiles) {
  40           print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname},
  41                 " level ", $dumpfile->{level}, "\n";
  42       }
  43   }
  44
  45 =head1 MODEL
  46
  47 The Amanda catalog is modeled as a set of dumps comprised of parts.  A dump is
  48 a complete bytestream received from an application, and is uniquely identified
  49 by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
  50 and C<write_timestamp>.  A dump may be partial, or even a complete failure.
  51
  52 A part corresponds to a single file on a volume, containing a portion of the
  53 data for a dump.  A part, then, is completely specified by a volume label and a
  54 file number (C<filenum>).  Each part has, among other things, a part number
  55 (C<partnum>) which gives its relative position within the dump.  The bytestream
  56 for a dump is recovered by concatenating all of the successful (C<status> = OK)
  57 parts matching the dump.
  58
  59 Files in the holding disk are considered part of the catalog, and are
  60 represented as single-part dumps (holding-disk chunking is ignored, as it is
  61 distinct from split parts).
  62
  63 =head2 DUMPS
  64
  65 The dump table contains one row per dump.  It has the following columns:
  66
  67 =over
  68
  69 =item dump_timestamp
  70
  71 (string) -- timestamp of the run in which the dump was created
  72
  73 =item write_timestamp
  74
  75 (string) -- timestamp of the run in which the part was written to this volume,
  76 or C<"00000000000000"> for dumps in the holding disk.
  77
  78 =item hostname
  79
  80 (string) -- dump hostname
  81
  82 =item diskname
  83
  84 (string) -- dump diskname
  85
  86 =item level
  87
  88 (integer) -- dump level
  89
  90 =item status
  91
  92 (string) -- "OK", "PARTIAL", or "FAIL"
  93
  94 =item message
  95
  96 (string) -- reason for PARTIAL or FAIL status
  97
  98 =item nparts
  99
 100 (integer) -- number of successful parts in this dump
 101
 102 =item kb
 103
 104 (integer) -- size (in kb) of this part
 105
 106 =item orig_kb
 107
 108 (integer) -- size (in kb) of the complete dump (uncompress and uncrypted).
 109
 110 =item sec
 111
 112 (integer) -- time (in seconds) spent writing this part
 113
 114 =item parts
 115
 116 (arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
 117 always C<undef>).  When multiple partial parts are available, the choice of the
 118 partial that is included in this array is undefined.
 119
 120 =back
 121
 122 A dump is represented as a hashref with these keys.
 123
 124 The C<write_timestamp> gives the time of the amanda run in which the part was
 125 written to this volume.  The C<write_timestamp> may differ from the
 126 C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
 127 initial dump.
 128
 129 =head2 PARTS
 130
 131 The parts table contains one row per part, and has the following columns:
 132
 133 =over
 134
 135 =item label
 136
 137 (string) -- volume label (not present for holding files)
 138
 139 =item filenum
 140
 141 (integer) -- file on that volume (not present for holding files)
 142
 143 =item holding_file
 144
 145 (string) -- fully-qualified pathname of the holding file (not present for
 146 on-media dumps)
 147
 148 =item dump
 149
 150 (object ref) -- a reference to the dump containing this part
 151
 152 =item status
 153
 154 (string) -- "OK", "PARTIAL" or some other descriptor
 155
 156 =item partnum
 157
 158 (integer) -- part number of a split part (1-based)
 159
 160 =item kb
 161
 162 (integer) -- size (in kb) of this part
 163
 164 =item sec
 165
 166 (integer) -- time (in seconds) spent writing this part
 167
 168 =back
 169
 170 A part is represented as a hashref with these keys.  The C<label> and
 171 C<filenum> serve as a primary key.
 172
 173 Note that parts' C<dump> and dumps' C<parts> create a reference loop.  This is
 174 broken by making the C<parts> array's contents weak references in C<get_dumps>,
 175 and the C<dump> reference weak in C<get_parts>.
 176
 177 =head2 NOTES
 178
 179 All timestamps used in this module are full-length, in the format
 180 C<YYYYMMDDHHMMSS>.  If the underlying data contains only datestamps, they are
 181 zero-extended into timestamps: C<YYYYMMDD000000>.  A C<dump_timestamp> always
 182 corresponds to the initiation of the I<original> dump run, while
 183 C<write_timestamp> gives the time the file was written to the volume.  When
 184 parts are migrated from volume to volume (e.g., by I<amvault>), the
 185 C<dump_timestamp> does not change.
 186
 187 In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
 188 serves as a unique identifier for a dump bytestream, but because the bytestream
 189 may appear several times in the catalog (due to vaulting) the additional
 190 C<write_timestamp> is required to identify a particular on-storage instance of
 191 a dump.  Note that the part sizes may differ between instances, so it is not
 192 valid to concatenate parts from different dump instances.
 193
 194 =head1 INTERFACES
 195
 196 =head2 SUMMARY DATA
 197
 198 The following functions provide summary data based on the contents of the
 199 catalog.
 200
 201 =over
 202
 203 =item get_write_timestamps()
 204
 205 Get a list of all write timestamps, sorted in chronological order.
 206
 207 =item get_latest_write_timestamp()
 208
 209 Return the most recent write timestamp.
 210
 211 =item get_labels_written_at_timestamp($ts)
 212
 213 Return a list of labels for volumes written at the given timestamp.
 214
 215 =back
 216
 217 =head2 PARTS
 218
 219 =over
 220
 221 =item get_parts(%parameters)
 222
 223 This function returns a sequence of parts.  Values in C<%parameters> restrict
 224 the set of parts that are returned.  The hash can have any of the following
 225 keys:
 226
 227 =over
 228
 229 =item write_timestamp
 230
 231 restrict to parts written at this timestamp
 232
 233 =item write_timestamps
 234
 235 (arrayref) restrict to parts written at any of these timestamps (note that
 236 holding-disk files have no C<write_timestamp>, so this option and the previous
 237 will omit them)
 238
 239 =item dump_timestamp
 240
 241 restrict to parts with exactly this timestamp
 242
 243 =item dump_timestamps
 244
 245 (arrayref) restrict to parts with any of these timestamps
 246
 247 =item dump_timestamp_match
 248
 249 restrict to parts with timestamps matching this expression
 250
 251 =item holding
 252
 253 if true, only return dumps on holding disk.  If false, omit dumps on holding
 254 disk.
 255
 256 =item hostname
 257
 258 restrict to parts with exactly this hostname
 259
 260 =item hostnames
 261
 262 (arrayref) restrict to parts with any of these hostnames
 263
 264 =item hostname_match
 265
 266 restrict to parts with hostnames matching this expression
 267
 268 =item diskname
 269
 270 restrict to parts with exactly this diskname
 271
 272 =item disknames
 273
 274 (arrayref) restrict to parts with any of these disknames
 275
 276 =item diskname_match
 277
 278 restrict to parts with disknames matching this expression
 279
 280 =item label
 281
 282 restrict to parts with exactly this label
 283
 284 =item labels
 285
 286 (arrayref) restrict to parts with any of these labels
 287
 288 =item level
 289
 290 restrict to parts with exactly this level
 291
 292 =item levels
 293
 294 (arrayref) restrict to parts with any of these levels
 295
 296 =item status
 297
 298 restrict to parts with this status
 299
 300 =item dumpspecs
 301
 302 (arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs
 303
 304 =back
 305
 306 Match expressions are described in the amanda(8) manual page.
 307
 308 =item sort_parts([ $key1, $key2, .. ], @parts)
 309
 310 Given a list of parts, this function sorts that list by the requested keys.
 311 The following keys are available:
 312
 313 =over
 314
 315 =item hostname
 316
 317 =item diskname
 318
 319 =item write_timestamp
 320
 321 =item dump_timestamp
 322
 323 =item level
 324
 325 =item filenum
 326
 327 =item label
 328
 329 Note that this sorts labels I<lexically>, not necessarily in the order they were used!
 330
 331 =item partnum
 332
 333 =item nparts
 334
 335 =back
 336
 337 Keys are processed from left to right: if two dumps have the same value for
 338 C<$key1>, then C<$key2> is examined, and so on.  Key names may be prefixed by a
 339 dash (C<->) to reverse the order.
 340
 341 Note that some of these keys are dump keys; the function will automatically
 342 access those values via the C<dump> attribute.
 343
 344 =back
 345
 346 =head2 DUMPS
 347
 348 =over
 349
 350 =item get_dumps(%parameters)
 351
 352 This function returns a sequence of dumps.  Values in C<%parameters> restrict
 353 the set of dumps that are returned.  The same keys as are used for C<get_parts>
 354 are available here, with the exception of C<label> and C<labels>.  The
 355 C<status> key applies to the dump status, not the status of its constituent
 356 parts.
 357
 358 =item sort_dumps([ $key1, $key2 ], @dumps)
 359
 360 Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
 361 The same keys are available, with the exception of C<label>, C<filenum>, and
 362 C<partnum>.
 363
 364 =back
 365
 366 =head2 ADDING DATA
 367
 368 =over
 369
 370 =item add_part($part)
 371
 372 Add the given part to the database.  In terms of logfiles, this will either
 373 create a new logfile (if the part's C<write_timestamp> has not been seen
 374 before) or append to an existing logfile.  Note that a new logfile will require
 375 a corresponding new entry in the tapelist.
 376
 377 Note that no locking is performed: multiple simultaneous calls to this function
 378 can result in a corrupted or incorrect logfile.
 379
 380 TODO: add_dump
 381
 382 =back
 383
 384 =cut
 385
 386 use Amanda::Logfile qw( :constants match_disk match_host
 387                         match_datestamp match_level );
 388 use Amanda::Tapelist;
 389 use Amanda::Config qw( :init :getconf config_dir_relative );
 390 use Amanda::Util qw( quote_string weaken_ref );
 391 use warnings;
 392 use strict;
 393
 394 # tapelist cache
 395 my $tapelist = undef;
 396 my $tapelist_filename = undef;
 397
 398 # utility function
 399 sub zeropad {
 400     my ($timestamp) = @_;
 401     if (length($timestamp) == 8) {
 402         return $timestamp."000000";
 403     }
 404     return $timestamp;
 405 }
 406
 407 sub get_write_timestamps {
 408     my @rv;
 409
 410     # find_log assumes that the tapelist has been loaded, so load it now
 411     _load_tapelist();
 412
 413     for (Amanda::Logfile::find_log()) {
 414         next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 415         push @rv, zeropad($timestamp);
 416     }
 417
 418     return sort @rv;
 419 }
 420
 421 sub get_latest_write_timestamp {
 422     # get all of the timestamps and select the last one
 423     my @timestamps = get_write_timestamps();
 424
 425     if (@timestamps) {
 426         return $timestamps[-1];
 427     }
 428
 429     return undef;
 430 }
 431
 432 # this generic function implements the loop of scanning logfiles to find
 433 # the requested data; get_parts and get_dumps then adjust the results to
 434 # match what the user expects.
 435 sub get_parts_and_dumps {
 436     my $get_what = shift; # "parts" or "dumps"
 437     my %params = @_;
 438     my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
 439
 440     # find_log assumes that the tapelist has been loaded, so load it now
 441     _load_tapelist();
 442
 443     # pre-process params by appending all of the "singular" parameters to the "plurals"
 444     push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'}
 445         if exists($params{'write_timestamp'});
 446     push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'}
 447         if exists($params{'dump_timestamp'});
 448     push @{$params{'hostnames'}}, $params{'hostname'}
 449         if exists($params{'hostname'});
 450     push @{$params{'disknames'}}, $params{'diskname'}
 451         if exists($params{'diskname'});
 452     push @{$params{'levels'}}, $params{'level'}
 453         if exists($params{'level'});
 454     if ($get_what eq 'parts') {
 455         push @{$params{'labels'}}, $params{'label'}
 456             if exists($params{'label'});
 457     } else {
 458         delete $params{'labels'};
 459     }
 460
 461     # specifying write_timestamps implies we won't check holding files
 462     if ($params{'write_timestamps'}) {
 463         if (defined $params{'holding'} and $params{'holding'}) {
 464             return [], []; # well, that's easy..
 465         }
 466         $params{'holding'} = 0;
 467     }
 468
 469     # Since we're working from logfiles, we have to pick the logfiles we'll use first.
 470     # Then we can use search_logfile.
 471     my @logfiles;
 472     if ($params{'holding'}) {
 473         @logfiles = ( 'holding', );
 474     } elsif (exists($params{'write_timestamps'})) {
 475         # if we have specific write_timestamps, the job is pretty easy.
 476         my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
 477         for my $logfile (Amanda::Logfile::find_log()) {
 478             next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 479             next unless (exists($timestamps_hash{zeropad($timestamp)}));
 480             push @logfiles, $logfile;
 481         }
 482     } elsif (exists($params{'dump_timestamps'})) {
 483         # otherwise, we need only look in logfiles at or after the earliest dump timestamp
 484         my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
 485         my $earliest_timestamp = $sorted_timestamps[0];
 486         for my $logfile (Amanda::Logfile::find_log()) {
 487             next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 488             next unless (zeropad($timestamp) ge $earliest_timestamp);
 489             push @logfiles, $logfile;
 490         }
 491     } else {
 492         # oh well -- it looks like we'll have to read all existing logfiles.
 493         @logfiles = Amanda::Logfile::find_log();
 494     }
 495
 496     # Set up some hash tables for speedy lookups of various attributes
 497     my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash);
 498     %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
 499         if (exists($params{'dump_timestamps'}));
 500     %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
 501         if (exists($params{'hostnames'}));
 502     %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
 503         if (exists($params{'disknames'}));
 504     %levels_hash = map { ($_, undef) } @{$params{'levels'}}
 505         if (exists($params{'levels'}));
 506     %labels_hash = map { ($_, undef) } @{$params{'labels'}}
 507         if (exists($params{'labels'}));
 508
 509     my %dumps;
 510     my @parts;
 511
 512     # *also* scan holding if the holding param wasn't specified
 513     if (!exists $params{'holding'}) {
 514         push @logfiles, 'holding';
 515     }
 516
 517     # now loop over those logfiles and use search_logfile to load the dumpfiles
 518     # from them, then process each entry from the logfile
 519     for my $logfile (@logfiles) {
 520         my (@find_results, $write_timestamp);
 521
 522         # get the raw contents from search_logfile, or use holding if
 523         # $logfile is undef
 524         if ($logfile ne 'holding') {
 525             @find_results = Amanda::Logfile::search_logfile(undef, undef,
 526                                                         "$logfile_dir/$logfile", 1);
 527             # convert to dumpfile hashes, including the write_timestamp from the logfile name
 528             my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
 529             $write_timestamp = zeropad($timestamp);
 530
 531         } else {
 532             @find_results = Amanda::Logfile::search_holding_disk();
 533             $write_timestamp = '00000000000000';
 534         }
 535
 536         # filter against *_match with dumps_match
 537         @find_results = Amanda::Logfile::dumps_match([@find_results],
 538             exists($params{'hostname_match'})? $params{'hostname_match'} : undef,
 539             exists($params{'diskname_match'})? $params{'diskname_match'} : undef,
 540             exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,
 541             undef,
 542             0);
 543
 544         # loop over each entry in the logfile.
 545         for my $find_result (@find_results) {
 546
 547             # filter out the non-dump error messages that find.c produces
 548             next unless (defined $find_result->{'label'});
 549
 550             # bail out on this result early, if possible
 551             next if (%dump_timestamps_hash
 552                 and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
 553             next if (%hostnames_hash
 554                 and !exists($hostnames_hash{$find_result->{'hostname'}}));
 555             next if (%disknames_hash
 556                 and !exists($disknames_hash{$find_result->{'diskname'}}));
 557             next if (%levels_hash
 558                 and !exists($levels_hash{$find_result->{'level'}}));
 559             next if (%labels_hash
 560                 and !exists($labels_hash{$find_result->{'label'}}));
 561             if ($get_what eq 'parts') {
 562                 next if (exists($params{'status'})
 563                     and $find_result->{'status'} ne $params{'status'});
 564             }
 565
 566             # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
 567             # tendency to produce duplicate results
 568             next if ($params{'dumpspecs'}
 569                 and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
 570                                                     $params{'dumpspecs'}, 0));
 571
 572             my $dump_timestamp = zeropad($find_result->{'timestamp'});
 573
 574             my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
 575                                      $write_timestamp, $find_result->{'level'});
 576             my $dump = $dumps{$dumpkey};
 577             if (!defined $dump) {
 578                 $dump = $dumps{$dumpkey} = {
 579                     dump_timestamp => $dump_timestamp,
 580                     write_timestamp => $write_timestamp,
 581                     hostname => $find_result->{'hostname'},
 582                     diskname => $find_result->{'diskname'},
 583                     level => $find_result->{'level'}+0,
 584                     orig_kb => $find_result->{'orig_kb'},
 585                     status => $find_result->{'dump_status'},
 586                     message => $find_result->{'message'},
 587                     # the rest of these params are unknown until we see a taper
 588                     # DONE, PARTIAL, or FAIL line, although we count nparts
 589                     # manually instead of relying on the logfile
 590                     nparts => 0,
 591                     kb => -1,
 592                     sec => -1,
 593                 };
 594             }
 595
 596             # start setting up a part hash for this result
 597             my %part;
 598             if ($logfile ne 'holding') {
 599                 # on-media dump
 600                 %part = (
 601                     label => $find_result->{'label'},
 602                     filenum => $find_result->{'filenum'},
 603                     dump => $dump,
 604                     status => $find_result->{'status'},
 605                     sec => $find_result->{'sec'},
 606                     kb => $find_result->{'kb'},
 607                     orig_kb => $find_result->{'orig_kb'},
 608                     partnum => $find_result->{'partnum'},
 609                 );
 610             } else {
 611                 # holding disk
 612                 %part = (
 613                     holding_file => $find_result->{'label'},
 614                     dump => $dump,
 615                     status => $find_result->{'status'},
 616                     sec => 0.0,
 617                     kb => $find_result->{'kb'},
 618                     orig_kb => $find_result->{'orig_kb'},
 619                     partnum => 1,
 620                 );
 621                 # and fix up the dump, too
 622                 $dump->{'status'} = $find_result->{'status'};
 623                 $dump->{'kb'} = $find_result->{'kb'};
 624                 $dump->{'sec'} = $find_result->{'sec'};
 625             }
 626
 627             # weaken the dump ref if we're returning dumps
 628             weaken_ref($part{'dump'})
 629                 if ($get_what eq 'dumps');
 630
 631             # count the number of successful parts in the dump
 632             $dump->{'nparts'}++ if $part{'status'} eq 'OK';
 633
 634             # and add a ref to the array of parts; if we're getting
 635             # parts, then this is a weak ref
 636             $dump->{'parts'}[$part{'partnum'}] = \%part;
 637             weaken_ref($dump->{'parts'}[$part{'partnum'}])
 638                 if ($get_what eq 'parts');
 639
 640             push @parts, \%part;
 641         }
 642
 643         # if these dumps were on the holding disk, then we're done
 644         next if $logfile eq 'holding';
 645
 646         # re-read the logfile to extract dump-level info that's not captured by
 647         # search_logfile
 648         my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
 649         die "logfile '$logfile' not found" unless $logh;
 650         while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
 651             next unless $prog == $P_TAPER;
 652             my $status;
 653             if ($type == $L_DONE) {
 654                 $status = 'OK';
 655             } elsif ($type == $L_PARTIAL) {
 656                 $status = 'PARTIAL';
 657             } elsif ($type == $L_FAIL) {
 658                 $status = 'FAIL';
 659             } else {
 660                 next;
 661             }
 662
 663             # now extract the appropriate info; luckily these log lines have the same
 664             # format, more or less
 665             my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $message);
 666             ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
 667             ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
 668             ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
 669             if ($status ne 'FAIL') {
 670                 ($nparts, $str) = Amanda::Util::skip_quoted_string($str);
 671             } else {
 672                 $nparts = 0;
 673             }
 674             ($level, $str) = Amanda::Util::skip_quoted_string($str);
 675             if ($status ne 'FAIL') {
 676                 my $s = $str;
 677                 ($secs, $kb, $str) = ($str =~ /^\[sec ([0-9.]+) kb (\d+) .*\] ?(.*)$/)
 678                     or die("'$s'");
 679             }
 680             if ($status ne 'OK') {
 681                 $message = $str;
 682             } else {
 683                 $message = '';
 684             }
 685
 686             $hostname = Amanda::Util::unquote_string($hostname);
 687             $diskname = Amanda::Util::unquote_string($diskname);
 688             $message = Amanda::Util::unquote_string($message) if $message;
 689
 690             # filter against dump criteria
 691             next if ($params{'dump_timestamp_match'}
 692                 and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
 693             next if (%dump_timestamps_hash
 694                 and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));
 695
 696             next if ($params{'hostname_match'}
 697                 and !match_host($params{'hostname_match'}, $hostname));
 698             next if (%hostnames_hash
 699                 and !exists($hostnames_hash{$hostname}));
 700
 701             next if ($params{'diskname_match'}
 702                 and !match_disk($params{'diskname_match'}, $diskname));
 703             next if (%disknames_hash
 704                 and !exists($disknames_hash{$diskname}));
 705
 706             next if (%levels_hash
 707                 and !exists($levels_hash{$level}));
 708             # get_dumps filters on status
 709
 710             if ($params{'dumpspecs'}) {
 711                 my $ok = 0;
 712                 for my $ds (@{$params{'dumpspecs'}}) {
 713                     # (the "". are for SWIG's benefit - SWIGged functions don't like
 714                     # strings generated by SWIG.  Long story.)
 715                     next if (defined $ds->{'host'}
 716                             and !match_host("".$ds->{'host'}, $hostname));
 717                     next if (defined $ds->{'disk'}
 718                             and !match_disk("".$ds->{'disk'}, $diskname));
 719                     next if (defined $ds->{'datestamp'}
 720                             and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
 721                     next if (defined $ds->{'level'}
 722                             and !match_level("".$ds->{'level'}, $level));
 723
 724                     $ok = 1;
 725                     last;
 726                 }
 727                 next unless $ok;
 728             }
 729
 730             my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp, $level);
 731             my $dump = $dumps{$dumpkey};
 732             if (!defined $dump) {
 733                 # this will happen when a dump has no parts - a FAILed dump.
 734                 $dump = $dumps{$dumpkey} = {
 735                     dump_timestamp => $dump_timestamp,
 736                     write_timestamp => $write_timestamp,
 737                     hostname => $hostname,
 738                     diskname => $diskname,
 739                     level => $level+0,
 740                     nparts => $nparts, # hopefully 0?
 741                 };
 742             }
 743
 744             $dump->{'message'} = $message;
 745             if ($status eq 'FAIL') {
 746                 $dump->{'kb'} = 0;
 747                 $dump->{'sec'} = 0.0;
 748             } else {
 749                 $dump->{'kb'} = $kb+0;
 750                 $dump->{'sec'} = $secs+0.0;
 751             }
 752         }
 753         Amanda::Logfile::close_logfile($logh);
 754     }
 755
 756     return [ values %dumps], \@parts;
 757 }
 758
 759 sub get_parts {
 760     my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
 761     return @$parts;
 762 }
 763
 764 sub get_dumps {
 765     my %params = @_;
 766     my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
 767     my @dumps = @$dumps;
 768
 769     if (exists $params{'status'}) {
 770         @dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;
 771     }
 772
 773     return @dumps;
 774 }
 775
 776 sub sort_parts {
 777     my ($keys, @parts) = @_;
 778
 779     # TODO: make this more efficient by selecting the comparison
 780     # functions once, in advance, and just applying them
 781     return sort {
 782         my $res;
 783         for my $key (@$keys) {
 784             my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
 785
 786             if ($k =~ /^(partnum|filenum)$/) {
 787                 # compare part components numerically
 788                 $res = $a->{$k} <=> $b->{$k};
 789             } elsif ($k =~ /^(nparts|level)$/) {
 790                 # compare dump components numerically
 791                 $res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
 792             } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
 793                 # compare dump components alphabetically
 794                 $res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
 795             } else { # (label)
 796                 # compare part components alphabetically
 797                 $res = $a->{$k} cmp $b->{$k};
 798             }
 799             $res = -$res if ($rev eq '-' and $res);
 800             return $res if $res;
 801         }
 802         return 0;
 803     } @parts;
 804 }
 805
 806 sub sort_dumps {
 807     my ($keys, @dumps) = @_;
 808
 809     # TODO: make this more efficient by selecting the comparison
 810     # functions once, in advance, and just applying them
 811     return sort {
 812         my $res;
 813         for my $key (@$keys) {
 814             my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
 815
 816             if ($k =~ /^(nparts|level)$/) {
 817                 # compare dump components numerically
 818                 $res = $a->{$k} <=> $b->{$k};
 819             } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
 820                 # compare dump components alphabetically
 821                 $res = $a->{$k} cmp $b->{$k};
 822             }
 823             $res = -$res if ($rev eq '-' and $res);
 824             return $res if $res;
 825         }
 826         return 0;
 827     } @dumps;
 828 }
 829
 830 # caches for add_part() to avoid repeatedly looking up the log
 831 # filename for a particular write_timestamp.
 832 my $add_part_last_label = undef;
 833 my $add_part_last_write_timestamp = undef;
 834 my $add_part_last_logfile = undef;
 835
 836 sub add_part {
 837     my ($dump) = @_;
 838     my $found;
 839     my $logfh;
 840     my $logfile;
 841     my $find_result;
 842     my $logdir = getconf($CNF_LOGDIR);
 843     my ($last_filenum, $last_secs, $last_kbs);
 844
 845     # first order of business is to find out whether we need to make a new
 846     # dumpfile for this.
 847     my $write_timestamp = zeropad($dump->{'write_timestamp'});
 848     die "dump has no 'write_timestamp'" unless defined $write_timestamp;
 849
 850     # consult our one-element cache for this label and write_timestamp
 851     if (!defined $add_part_last_label
 852         or $add_part_last_label ne $dump->{'label'}
 853         or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {
 854
 855         # update the cache
 856         $add_part_last_logfile = undef;
 857         LOGFILE:
 858         for my $lf (Amanda::Logfile::find_log()) {
 859             next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 860             next unless (zeropad($log_timestamp) eq $write_timestamp);
 861
 862             # write timestamp matches; now check the label
 863             LOGFILE_DUMP:
 864             for $find_result (Amanda::Logfile::search_logfile(undef, undef,
 865                                         "$logdir/$lf", 1)) {
 866                 next unless (defined $find_result->{'label'});
 867
 868                 if ($find_result->{'label'} eq $dump->{'label'}) {
 869                     $add_part_last_label = $dump->{'label'};
 870                     $add_part_last_write_timestamp = $dump->{'write_timestamp'};
 871                     $add_part_last_logfile = $lf;
 872                     last LOGFILE;
 873                 }
 874             }
 875         }
 876     }
 877     $logfile = $add_part_last_logfile;
 878
 879     # truncate the write_timestamp if we're not using timestamps
 880     if (!getconf($CNF_USETIMESTAMPS)) {
 881         $write_timestamp = substr($write_timestamp, 0, 8);
 882     }
 883
 884     # get the information on the last dump and part in this logfile, or create
 885     # a new logfile if none exists, then open the logfile for writing.
 886     if (defined $logfile) {
 887         $last_filenum = -1;
 888
 889         # NOTE: this depends on an implementation detail of search_logfile: it
 890         # returns the results in the reverse order of appearance in the logfile.
 891         # Since we're concerned with the last elements of this logfile that we
 892         # will be appending to shortly, we simply reverse this list.  As this
 893         # package is rewritten to parse logfiles on its own (or access a relational
 894         # database), this implementation detail will no longer be relevant.
 895         my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
 896                                                     "$logdir/$logfile", 1);
 897         for $find_result (@find_results) {
 898             # filter out the non-dump error messages that find.c produces
 899             next unless (defined $find_result->{'label'});
 900
 901             $last_filenum = $find_result->{'filenum'};
 902
 903             # if this is part number 1, reset our secs and kbs counters on the
 904             # assumption that this is the beginning of a new dump
 905             if ($find_result->{'partnum'} == 1) {
 906                 $last_secs = $last_kbs = 0;
 907             }
 908             $last_secs += $find_result->{'sec'};
 909             $last_kbs += $find_result->{'kb'};
 910         }
 911
 912         open($logfh, ">>", "$logdir/$logfile");
 913     } else {
 914         $last_filenum = -1;
 915         $last_secs = 0;
 916         $last_kbs = 0;
 917
 918         # pick an unused log filename
 919         my $i = 0;
 920         while (1) {
 921             $logfile = "log.$write_timestamp.$i";
 922             last unless -f "$logdir/$logfile";
 923             $i++;
 924         }
 925
 926         open($logfh, ">", "$logdir/$logfile")
 927             or die("Could not write '$logdir/$logfile': $!");
 928
 929         print $logfh
 930             "INFO taper This logfile was generated by Amanda::DB::Catalog\n";
 931
 932         print $logfh
 933             "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";
 934
 935         if (!defined $tapelist_filename) {
 936             $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
 937         }
 938
 939         # reload the tapelist immediately, in case it's been modified
 940         $tapelist = Amanda::Tapelist::read_tapelist($tapelist_filename);
 941
 942         # see if we need to add an entry to the tapelist for this dump
 943         if (!grep { $_->{'label'} eq $dump->{'label'}
 944                     and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
 945                 } @$tapelist) {
 946             $tapelist->add_tapelabel($write_timestamp, $dump->{'label'});
 947             $tapelist->write($tapelist_filename);
 948         }
 949     }
 950
 951     if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
 952         warn "Discontinuity in filenums in $logfile: " .
 953              "from $last_filenum to $dump->{filenum}";
 954     }
 955
 956     my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;
 957
 958     my $part_line = "PART taper ";
 959     $part_line .= "$dump->{label} ";
 960     $part_line .= "$dump->{filenum} ";
 961     $part_line .= quote_string($dump->{hostname}) . " ";
 962     $part_line .= quote_string($dump->{diskname}) . " ";
 963     $part_line .= "$dump->{dump_timestamp} ";
 964     $part_line .= "$dump->{partnum}/$dump->{nparts} ";
 965     $part_line .= "$dump->{level} ";
 966     $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
 967     print $logfh "$part_line\n";
 968
 969     # TODO: we don't always know nparts when writing a part, so
 970     # this is not always an effective way to detect a complete dump.
 971     # However, it works for purposes of data vaulting.
 972     if ($dump->{'partnum'} == $dump->{'nparts'}) {
 973         my $secs = $last_secs + $dump->{'sec'};
 974         my $kbs = $last_kbs + $dump->{'kb'};
 975         $kps = $secs? ($kbs + 0.0) / $secs : 0.0;
 976
 977         my $done_line = "DONE taper ";
 978         $done_line .= quote_string($dump->{hostname}) ." ";
 979         $done_line .= quote_string($dump->{diskname}) ." ";
 980         $done_line .= "$dump->{dump_timestamp} ";
 981         $done_line .= "$dump->{nparts} ";
 982         $done_line .= "$dump->{level} ";
 983         $done_line .= "[sec $secs kb $kbs kps $kps]";
 984         print $logfh "$done_line\n";
 985     }
 986
 987     close($logfh);
 988 }
 989
 990 sub _load_tapelist {
 991     if (!defined $tapelist) {
 992         $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
 993         $tapelist = Amanda::Tapelist::read_tapelist($tapelist_filename);
 994     }
 995 }
 996
 997 sub _clear_cache { # (used by installcheck)
 998     $tapelist = $tapelist_filename = undef;
 999 }
1000
1001 1;