git.gag.com Git - debian/amanda/blob - perl/Amanda/DB/Catalog.pm

   1 # Copyright (c) 2008, 2009, 2010 Zmanda, Inc.  All Rights Reserved.
   2 #
   3 # This program is free software; you can redistribute it and/or modify it
   4 # under the terms of the GNU General Public License version 2 as published
   5 # by the Free Software Foundation.
   6 #
   7 # This program is distributed in the hope that it will be useful, but
   8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   9 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  10 # for more details.
  11 #
  12 # You should have received a copy of the GNU General Public License along
  13 # with this program; if not, write to the Free Software Foundation, Inc.,
  14 # 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  15 #
  16 # Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
  17 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  18
  19 package Amanda::DB::Catalog;
  20
  21 =head1 NAME
  22
  23 Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
  24
  25 =head1 SYNOPSIS
  26
  27   use Amanda::DB::Catalog;
  28
  29   # get all dump timestamps on record
  30   my @timestamps = Amanda::DB::Catalog::get_timestamps();
  31
  32   # loop over those timestamps, printing dump info for each one
  33   for my $timestamp (@timestamps) {
  34       my @dumpfiles = Amanda::DB::Catalog::get_parts(
  35           timestamp => $timestamp,
  36           ok => 1
  37       );
  38       print "$timstamp:\n";
  39       for my $dumpfile (@dumpfiles) {
  40           print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname},
  41                 " level ", $dumpfile->{level}, "\n";
  42       }
  43   }
  44
  45 =head1 MODEL
  46
  47 The Amanda catalog is modeled as a set of dumps comprised of parts.  A dump is
  48 a complete bytestream received from an application, and is uniquely identified
  49 by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
  50 and C<write_timestamp>.  A dump may be partial, or even a complete failure.
  51
  52 A part corresponds to a single file on a volume, containing a portion of the
  53 data for a dump.  A part, then, is completely specified by a volume label and a
  54 file number (C<filenum>).  Each part has, among other things, a part number
  55 (C<partnum>) which gives its relative position within the dump.  The bytestream
  56 for a dump is recovered by concatenating all of the successful (C<status> = OK)
  57 parts matching the dump.
  58
  59 Files in the holding disk are considered part of the catalog, and are
  60 represented as single-part dumps (holding-disk chunking is ignored, as it is
  61 distinct from split parts).
  62
  63 =head2 DUMPS
  64
  65 The dump table contains one row per dump.  It has the following columns:
  66
  67 =over
  68
  69 =item dump_timestamp
  70
  71 (string) -- timestamp of the run in which the dump was created
  72
  73 =item write_timestamp
  74
  75 (string) -- timestamp of the run in which the part was written to this volume,
  76 or C<"00000000000000"> for dumps in the holding disk.
  77
  78 =item hostname
  79
  80 (string) -- dump hostname
  81
  82 =item diskname
  83
  84 (string) -- dump diskname
  85
  86 =item level
  87
  88 (integer) -- dump level
  89
  90 =item status
  91
  92 (string) -- The status of the dump - "OK", "PARTIAL", or "FAIL".  If a disk
  93 failed to dump at all, then it is not part of the catalog and thus will not
  94 have an associated dump row.
  95
  96 =item message
  97
  98 (string) -- reason for PARTIAL or FAIL status
  99
 100 =item nparts
 101
 102 (integer) -- number of successful parts in this dump
 103
 104 =item bytes
 105
 106 (integer) -- size (in bytes) of the dump on disk, 0 if the size is not known.
 107
 108 =item kb
 109
 110 (integer) -- size (in kb) of the dump on disk
 111
 112 =item orig_kb
 113
 114 (integer) -- size (in kb) of the complete dump (before compression or encryption); undef
 115 if not available
 116
 117 =item sec
 118
 119 (integer) -- time (in seconds) spent writing this part
 120
 121 =item parts
 122
 123 (arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
 124 always C<undef>).  When multiple partial parts are available, the choice of the
 125 partial that is included in this array is undefined.
 126
 127 =back
 128
 129 A dump is represented as a hashref with these keys.
 130
 131 The C<write_timestamp> gives the time of the amanda run in which the part was
 132 written to this volume.  The C<write_timestamp> may differ from the
 133 C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
 134 initial dump.
 135
 136 =head2 PARTS
 137
 138 The parts table contains one row per part, and has the following columns:
 139
 140 =over
 141
 142 =item label
 143
 144 (string) -- volume label (not present for holding files)
 145
 146 =item filenum
 147
 148 (integer) -- file on that volume (not present for holding files)
 149
 150 =item holding_file
 151
 152 (string) -- fully-qualified pathname of the holding file (not present for
 153 on-media dumps)
 154
 155 =item dump
 156
 157 (object ref) -- a reference to the dump containing this part
 158
 159 =item status
 160
 161 (string) -- The status of the part - "OK", "PARTIAL", or "FAILED".
 162
 163 =item partnum
 164
 165 (integer) -- part number of a split part (1-based)
 166
 167 =item kb
 168
 169 (integer) -- size (in kb) of this part
 170
 171 =item sec
 172
 173 (integer) -- time (in seconds) spent writing this part
 174
 175 =back
 176
 177 A part is represented as a hashref with these keys.  The C<label> and
 178 C<filenum> serve as a primary key.
 179
 180 Note that parts' C<dump> and dumps' C<parts> create a reference loop.  This is
 181 broken by making the C<parts> array's contents weak references in C<get_dumps>,
 182 and the C<dump> reference weak in C<get_parts>.
 183
 184 =head2 NOTES
 185
 186 All timestamps used in this module are full-length, in the format
 187 C<YYYYMMDDHHMMSS>.  If the underlying data contains only datestamps, they are
 188 zero-extended into timestamps: C<YYYYMMDD000000>.  A C<dump_timestamp> always
 189 corresponds to the initiation of the I<original> dump run, while
 190 C<write_timestamp> gives the time the file was written to the volume.  When
 191 parts are migrated from volume to volume (e.g., by I<amvault>), the
 192 C<dump_timestamp> does not change.
 193
 194 In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
 195 serves as a unique identifier for a dump bytestream, but because the bytestream
 196 may appear several times in the catalog (due to vaulting) the additional
 197 C<write_timestamp> is required to identify a particular on-storage instance of
 198 a dump.  Note that the part sizes may differ between instances, so it is not
 199 valid to concatenate parts from different dump instances.
 200
 201 =head1 INTERFACES
 202
 203 =head2 SUMMARY DATA
 204
 205 The following functions provide summary data based on the contents of the
 206 catalog.
 207
 208 =over
 209
 210 =item get_write_timestamps()
 211
 212 Get a list of all write timestamps, sorted in chronological order.
 213
 214 =item get_latest_write_timestamp()
 215
 216 Return the most recent write timestamp.
 217
 218 =item get_latest_write_timestamp(type => 'amvault')
 219 =item get_latest_write_timestamp(types => [ 'amvault', .. ])
 220
 221 Return the timestamp of the most recent dump of the given type or types.  The
 222 available types are given below for C<get_run_type>.
 223
 224 =item get_labels_written_at_timestamp($ts)
 225
 226 Return a list of labels for volumes written at the given timestamp.
 227
 228 =item get_run_type($ts)
 229
 230 Return the type of run made at the given timestamp.  The result is one of
 231 C<amvault>, C<amdump>, C<amflush>, or the default, C<unknown>.
 232
 233 =back
 234
 235 =head2 PARTS
 236
 237 =over
 238
 239 =item get_parts(%parameters)
 240
 241 This function returns a sequence of parts.  Values in C<%parameters> restrict
 242 the set of parts that are returned.  The hash can have any of the following
 243 keys:
 244
 245 =over
 246
 247 =item write_timestamp
 248
 249 restrict to parts written at this timestamp
 250
 251 =item write_timestamps
 252
 253 (arrayref) restrict to parts written at any of these timestamps (note that
 254 holding-disk files have no C<write_timestamp>, so this option and the previous
 255 will omit them)
 256
 257 =item dump_timestamp
 258
 259 restrict to parts with exactly this timestamp
 260
 261 =item dump_timestamps
 262
 263 (arrayref) restrict to parts with any of these timestamps
 264
 265 =item dump_timestamp_match
 266
 267 restrict to parts with timestamps matching this expression
 268
 269 =item holding
 270
 271 if true, only return dumps on holding disk.  If false, omit dumps on holding
 272 disk.
 273
 274 =item hostname
 275
 276 restrict to parts with exactly this hostname
 277
 278 =item hostnames
 279
 280 (arrayref) restrict to parts with any of these hostnames
 281
 282 =item hostname_match
 283
 284 restrict to parts with hostnames matching this expression
 285
 286 =item diskname
 287
 288 restrict to parts with exactly this diskname
 289
 290 =item disknames
 291
 292 (arrayref) restrict to parts with any of these disknames
 293
 294 =item diskname_match
 295
 296 restrict to parts with disknames matching this expression
 297
 298 =item label
 299
 300 restrict to parts with exactly this label
 301
 302 =item labels
 303
 304 (arrayref) restrict to parts with any of these labels
 305
 306 =item level
 307
 308 restrict to parts with exactly this level
 309
 310 =item levels
 311
 312 (arrayref) restrict to parts with any of these levels
 313
 314 =item status
 315
 316 restrict to parts with this status
 317
 318 =item dumpspecs
 319
 320 (arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs
 321
 322 =back
 323
 324 Match expressions are described in the amanda(8) manual page.
 325
 326 =item sort_parts([ $key1, $key2, .. ], @parts)
 327
 328 Given a list of parts, this function sorts that list by the requested keys.
 329 The following keys are available:
 330
 331 =over
 332
 333 =item hostname
 334
 335 =item diskname
 336
 337 =item write_timestamp
 338
 339 =item dump_timestamp
 340
 341 =item level
 342
 343 =item filenum
 344
 345 =item label
 346
 347 Note that this sorts labels I<lexically>, not necessarily in the order they were used!
 348
 349 =item partnum
 350
 351 =item nparts
 352
 353 =back
 354
 355 Keys are processed from left to right: if two dumps have the same value for
 356 C<$key1>, then C<$key2> is examined, and so on.  Key names may be prefixed by a
 357 dash (C<->) to reverse the order.
 358
 359 Note that some of these keys are dump keys; the function will automatically
 360 access those values via the C<dump> attribute.
 361
 362 =back
 363
 364 =head2 DUMPS
 365
 366 =over
 367
 368 =item get_dumps(%parameters)
 369
 370 This function returns a sequence of dumps.  Values in C<%parameters> restrict
 371 the set of dumps that are returned.  The same keys as are used for C<get_parts>
 372 are available here, with the exception of C<label> and C<labels>.  In this
 373 case, the C<status> parameter applies to the dump status, not the status of its
 374 constituent parts.
 375
 376 =item sort_dumps([ $key1, $key2 ], @dumps)
 377
 378 Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
 379 The same keys are available, with the exception of C<label>, C<filenum>, and
 380 C<partnum>.
 381
 382 =back
 383
 384 =head2 ADDING DATA
 385
 386 =over
 387
 388 =item add_part($part)
 389
 390 Add the given part to the database.  In terms of logfiles, this will either
 391 create a new logfile (if the part's C<write_timestamp> has not been seen
 392 before) or append to an existing logfile.  Note that a new logfile will require
 393 a corresponding new entry in the tapelist.
 394
 395 Note that no locking is performed: multiple simultaneous calls to this function
 396 can result in a corrupted or incorrect logfile.
 397
 398 TODO: add_dump
 399
 400 =back
 401
 402 =cut
 403
 404 use Amanda::Logfile qw( :constants );
 405 use Amanda::Tapelist;
 406 use Amanda::Config qw( :init :getconf config_dir_relative );
 407 use Amanda::Util qw( quote_string weaken_ref match_disk match_host match_datestamp match_level);
 408 use File::Glob qw( :glob );
 409 use warnings;
 410 use strict;
 411
 412 # tapelist cache
 413 my $tapelist = undef;
 414
 415 # utility function
 416 sub zeropad {
 417     my ($timestamp) = @_;
 418     if (length($timestamp) == 8) {
 419         return $timestamp."000000";
 420     }
 421     return $timestamp;
 422 }
 423
 424 sub get_write_timestamps {
 425     my @rv;
 426
 427     # find_log assumes that the tapelist has been loaded, so load it now
 428     _load_tapelist();
 429
 430     for (Amanda::Logfile::find_log()) {
 431         next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 432         push @rv, zeropad($timestamp);
 433     }
 434
 435     return sort @rv;
 436 }
 437
 438 sub get_latest_write_timestamp {
 439     my %params = @_;
 440
 441     if ($params{'type'}) {
 442         push @{$params{'types'}}, $params{'type'};
 443     }
 444
 445     # get all of the timestamps and select the last one
 446     my @timestamps = get_write_timestamps();
 447
 448     if (@timestamps) {
 449         # if we're not looking for a particular type, then this is easy
 450         if (!exists $params{'types'}) {
 451             return $timestamps[-1];
 452         }
 453
 454         # otherwise we need to search backward until we find a logfile of
 455         # the right type
 456         while (@timestamps) {
 457             my $ts = pop @timestamps;
 458             my $typ = get_run_type($ts);
 459             if (grep { $_ eq $typ } @{$params{'types'}}) {
 460                 return $ts;
 461             }
 462         }
 463     }
 464
 465     return undef;
 466 }
 467
 468 sub get_run_type {
 469     my ($write_timestamp) = @_;
 470
 471     # find all of the logfiles with that name
 472     my $logdir = getconf($CNF_LOGDIR);
 473     my @matches = File::Glob::bsd_glob("$logdir/log.$write_timestamp.*", GLOB_NOSORT);
 474     if ($write_timestamp =~ /000000$/) {
 475         my $write_datestamp = substr($write_timestamp, 0, 8);
 476         push @matches, File::Glob::bsd_glob("$logdir/log.$write_datestamp.*", GLOB_NOSORT);
 477     }
 478
 479     for my $lf (@matches) {
 480         open(my $fh, "<", $lf) or next;
 481         while (<$fh>) {
 482             # amflush and amvault put their own names in
 483             return $1 if (/^START (amflush|amvault)/);
 484             # but for amdump we see planner
 485             return 'amdump' if (/^START planner/);
 486         }
 487     }
 488
 489     return "unknown";
 490 }
 491
 492
 493 # this generic function implements the loop of scanning logfiles to find
 494 # the requested data; get_parts and get_dumps then adjust the results to
 495 # match what the user expects.
 496 sub get_parts_and_dumps {
 497     my $get_what = shift; # "parts" or "dumps"
 498     my %params = @_;
 499     my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
 500
 501     # find_log assumes that the tapelist has been loaded, so load it now
 502     _load_tapelist();
 503
 504     # pre-process params by appending all of the "singular" parameters to the "plurals"
 505     push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'}
 506         if exists($params{'write_timestamp'});
 507     push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'}
 508         if exists($params{'dump_timestamp'});
 509     push @{$params{'hostnames'}}, $params{'hostname'}
 510         if exists($params{'hostname'});
 511     push @{$params{'disknames'}}, $params{'diskname'}
 512         if exists($params{'diskname'});
 513     push @{$params{'levels'}}, $params{'level'}
 514         if exists($params{'level'});
 515     if ($get_what eq 'parts') {
 516         push @{$params{'labels'}}, $params{'label'}
 517             if exists($params{'label'});
 518     } else {
 519         delete $params{'labels'};
 520     }
 521
 522     # specifying write_timestamps implies we won't check holding files
 523     if ($params{'write_timestamps'}) {
 524         if (defined $params{'holding'} and $params{'holding'}) {
 525             return [], []; # well, that's easy..
 526         }
 527         $params{'holding'} = 0;
 528     }
 529
 530     # Since we're working from logfiles, we have to pick the logfiles we'll use first.
 531     # Then we can use search_logfile.
 532     my @logfiles;
 533     if ($params{'holding'}) {
 534         @logfiles = ( 'holding', );
 535     } elsif (exists($params{'write_timestamps'})) {
 536         # if we have specific write_timestamps, the job is pretty easy.
 537         my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
 538         for my $logfile (Amanda::Logfile::find_log()) {
 539             next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 540             next unless (exists($timestamps_hash{zeropad($timestamp)}));
 541             push @logfiles, $logfile;
 542         }
 543     } elsif (exists($params{'dump_timestamps'})) {
 544         # otherwise, we need only look in logfiles at or after the earliest dump timestamp
 545         my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
 546         my $earliest_timestamp = $sorted_timestamps[0];
 547         for my $logfile (Amanda::Logfile::find_log()) {
 548             next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 549             next unless (zeropad($timestamp) ge $earliest_timestamp);
 550             push @logfiles, $logfile;
 551         }
 552     } else {
 553         # oh well -- it looks like we'll have to read all existing logfiles.
 554         @logfiles = Amanda::Logfile::find_log();
 555     }
 556
 557     # Set up some hash tables for speedy lookups of various attributes
 558     my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash);
 559     %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
 560         if (exists($params{'dump_timestamps'}));
 561     %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
 562         if (exists($params{'hostnames'}));
 563     %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
 564         if (exists($params{'disknames'}));
 565     %levels_hash = map { ($_, undef) } @{$params{'levels'}}
 566         if (exists($params{'levels'}));
 567     %labels_hash = map { ($_, undef) } @{$params{'labels'}}
 568         if (exists($params{'labels'}));
 569
 570     my %dumps;
 571     my @parts;
 572
 573     # *also* scan holding if the holding param wasn't specified
 574     if (!exists $params{'holding'}) {
 575         push @logfiles, 'holding';
 576     }
 577
 578     # now loop over those logfiles and use search_logfile to load the dumpfiles
 579     # from them, then process each entry from the logfile
 580     for my $logfile (@logfiles) {
 581         my (@find_results, $write_timestamp);
 582
 583         # get the raw contents from search_logfile, or use holding if
 584         # $logfile is undef
 585         if ($logfile ne 'holding') {
 586             @find_results = Amanda::Logfile::search_logfile(undef, undef,
 587                                                         "$logfile_dir/$logfile", 1);
 588             # convert to dumpfile hashes, including the write_timestamp from the logfile name
 589             my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
 590             $write_timestamp = zeropad($timestamp);
 591
 592         } else {
 593             @find_results = Amanda::Logfile::search_holding_disk();
 594             $write_timestamp = '00000000000000';
 595         }
 596
 597         # filter against *_match with dumps_match
 598         @find_results = Amanda::Logfile::dumps_match([@find_results],
 599             exists($params{'hostname_match'})? $params{'hostname_match'} : undef,
 600             exists($params{'diskname_match'})? $params{'diskname_match'} : undef,
 601             exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,
 602             undef,
 603             0);
 604
 605         # loop over each entry in the logfile.
 606         for my $find_result (@find_results) {
 607
 608             # filter out the non-dump error messages that find.c produces
 609             next unless (defined $find_result->{'label'});
 610
 611             # bail out on this result early, if possible
 612             next if (%dump_timestamps_hash
 613                 and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
 614             next if (%hostnames_hash
 615                 and !exists($hostnames_hash{$find_result->{'hostname'}}));
 616             next if (%disknames_hash
 617                 and !exists($disknames_hash{$find_result->{'diskname'}}));
 618             next if (%levels_hash
 619                 and !exists($levels_hash{$find_result->{'level'}}));
 620             next if (%labels_hash
 621                 and !exists($labels_hash{$find_result->{'label'}}));
 622             if ($get_what eq 'parts') {
 623                 next if (exists($params{'status'})
 624                     and defined $find_result->{'status'}
 625                     and $find_result->{'status'} ne $params{'status'});
 626             }
 627
 628             # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
 629             # tendency to produce duplicate results
 630             next if ($params{'dumpspecs'}
 631                 and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
 632                                                     $params{'dumpspecs'}, 0));
 633
 634             my $dump_timestamp = zeropad($find_result->{'timestamp'});
 635
 636             my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
 637                                      $write_timestamp, $find_result->{'level'}, $dump_timestamp);
 638             my $dump = $dumps{$dumpkey};
 639             if (!defined $dump) {
 640                 $dump = $dumps{$dumpkey} = {
 641                     dump_timestamp => $dump_timestamp,
 642                     write_timestamp => $write_timestamp,
 643                     hostname => $find_result->{'hostname'},
 644                     diskname => $find_result->{'diskname'},
 645                     level => $find_result->{'level'}+0,
 646                     orig_kb => $find_result->{'orig_kb'},
 647                     status => $find_result->{'dump_status'},
 648                     message => $find_result->{'message'},
 649                     # the rest of these params are unknown until we see a taper
 650                     # DONE, PARTIAL, or FAIL line, although we count nparts
 651                     # manually instead of relying on the logfile
 652                     nparts => 0, # $find_result->{'totalparts'}
 653                     bytes => -1, # $find_result->{'bytes'}
 654                     kb => -1,    # $find_result->{'kb'}
 655                     sec => -1,   # $find_result->{'sec'}
 656                 };
 657             }
 658
 659             # start setting up a part hash for this result
 660             my %part;
 661             if ($logfile ne 'holding') {
 662                 # on-media dump
 663                 %part = (
 664                     label => $find_result->{'label'},
 665                     filenum => $find_result->{'filenum'},
 666                     dump => $dump,
 667                     status => $find_result->{'status'} || 'FAILED',
 668                     sec => $find_result->{'sec'},
 669                     kb => $find_result->{'kb'},
 670                     orig_kb => $find_result->{'orig_kb'},
 671                     partnum => $find_result->{'partnum'},
 672                 );
 673             } else {
 674                 # holding disk
 675                 %part = (
 676                     holding_file => $find_result->{'label'},
 677                     dump => $dump,
 678                     status => $find_result->{'status'} || 'FAILED',
 679                     sec => 0.0,
 680                     kb => $find_result->{'kb'},
 681                     orig_kb => $find_result->{'orig_kb'},
 682                     partnum => 1,
 683                 );
 684                 # and fix up the dump, too
 685                 $dump->{'status'} = $find_result->{'status'} || 'FAILED';
 686                 $dump->{'bytes'} = $find_result->{'bytes'};
 687                 $dump->{'kb'} = $find_result->{'kb'};
 688                 $dump->{'sec'} = $find_result->{'sec'};
 689             }
 690
 691             # weaken the dump ref if we're returning dumps
 692             weaken_ref($part{'dump'})
 693                 if ($get_what eq 'dumps');
 694
 695             # count the number of successful parts in the dump
 696             $dump->{'nparts'}++ if $part{'status'} eq 'OK';
 697
 698             # and add a ref to the array of parts; if we're getting
 699             # parts, then this is a weak ref
 700             $dump->{'parts'}[$part{'partnum'}] = \%part;
 701             weaken_ref($dump->{'parts'}[$part{'partnum'}])
 702                 if ($get_what eq 'parts');
 703
 704             push @parts, \%part;
 705         }
 706
 707         # if these dumps were on the holding disk, then we're done
 708         next if $logfile eq 'holding';
 709
 710         # re-read the logfile to extract dump-level info that's not captured by
 711         # search_logfile
 712         my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
 713         die "logfile '$logfile' not found" unless $logh;
 714         while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
 715             next unless $prog == $P_TAPER;
 716             my $status;
 717             if ($type == $L_DONE) {
 718                 $status = 'OK';
 719             } elsif ($type == $L_PARTIAL) {
 720                 $status = 'PARTIAL';
 721             } elsif ($type == $L_FAIL) {
 722                 $status = 'FAIL';
 723             } elsif ($type == $L_SUCCESS) {
 724                 $status = "OK";
 725             } else {
 726                 next;
 727             }
 728
 729             # now extract the appropriate info; luckily these log lines have the same
 730             # format, more or less
 731             my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $bytes, $message);
 732             ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
 733             ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
 734             ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
 735             if ($status ne 'FAIL' and $type != $L_SUCCESS) { # nparts is not in SUCCESS lines
 736                 ($nparts, my $str1) = Amanda::Util::skip_quoted_string($str);
 737                 if (substr($str1, 0,1) ne '[') {
 738                     $str = $str1;
 739                 } else { # nparts is not in all PARTIAL lines
 740                     $nparts = 0;
 741                 }
 742
 743             } else {
 744                 $nparts = 0;
 745             }
 746             ($level, $str) = Amanda::Util::skip_quoted_string($str);
 747             if ($status ne 'FAIL') {
 748                 my $s = $str;
 749                 my $b_unit;
 750                 ($secs, $b_unit, $kb, $str) = ($str =~ /^\[sec ([-0-9.]+) (kb|bytes) ([-0-9]+).*\] ?(.*)$/)
 751                     or die("'$s'");
 752                 if ($b_unit eq 'bytes') {
 753                     $bytes = $kb;
 754                     $kb /= 1024;
 755                 } else {
 756                     $bytes = 0;
 757                 }
 758                 $secs = 0.1 if ($secs <= 0);
 759             }
 760             if ($status ne 'OK') {
 761                 $message = $str;
 762             } else {
 763                 $message = '';
 764             }
 765
 766             $hostname = Amanda::Util::unquote_string($hostname);
 767             $diskname = Amanda::Util::unquote_string($diskname);
 768             $message = Amanda::Util::unquote_string($message) if $message;
 769
 770             # filter against dump criteria
 771             next if ($params{'dump_timestamp_match'}
 772                 and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
 773             next if (%dump_timestamps_hash
 774                 and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));
 775
 776             next if ($params{'hostname_match'}
 777                 and !match_host($params{'hostname_match'}, $hostname));
 778             next if (%hostnames_hash
 779                 and !exists($hostnames_hash{$hostname}));
 780
 781             next if ($params{'diskname_match'}
 782                 and !match_disk($params{'diskname_match'}, $diskname));
 783             next if (%disknames_hash
 784                 and !exists($disknames_hash{$diskname}));
 785
 786             next if (%levels_hash
 787                 and !exists($levels_hash{$level}));
 788             # get_dumps filters on status
 789
 790             if ($params{'dumpspecs'}) {
 791                 my $ok = 0;
 792                 for my $ds (@{$params{'dumpspecs'}}) {
 793                     # (the "". are for SWIG's benefit - SWIGged functions don't like
 794                     # strings generated by SWIG.  Long story.)
 795                     next if (defined $ds->{'host'}
 796                             and !match_host("".$ds->{'host'}, $hostname));
 797                     next if (defined $ds->{'disk'}
 798                             and !match_disk("".$ds->{'disk'}, $diskname));
 799                     next if (defined $ds->{'datestamp'}
 800                             and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
 801                     next if (defined $ds->{'level'}
 802                             and !match_level("".$ds->{'level'}, $level));
 803                     next if (defined $ds->{'write_timestamp'}
 804                              and !match_datestamp("".$ds->{'write_timestamp'}, $write_timestamp));
 805                     $ok = 1;
 806                     last;
 807                 }
 808                 next unless $ok;
 809             }
 810
 811             my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp,
 812                                      $level, zeropad($dump_timestamp));
 813             my $dump = $dumps{$dumpkey};
 814             if (!defined $dump) {
 815                 # this will happen when a dump has no parts - a FAILed dump.
 816                 $dump = $dumps{$dumpkey} = {
 817                     dump_timestamp => zeropad($dump_timestamp),
 818                     write_timestamp => $write_timestamp,
 819                     hostname => $hostname,
 820                     diskname => $diskname,
 821                     level => $level+0,
 822                     orig_kb => undef,
 823                     status => "FAILED",
 824                     # message set below
 825                     nparts => $nparts, # hopefully 0?
 826                     # kb set below
 827                     # sec set below
 828                 };
 829             }
 830
 831             $dump->{'message'} = $message;
 832             if ($status eq 'FAIL') {
 833                 $dump->{'bytes'} = 0;
 834                 $dump->{'kb'} = 0;
 835                 $dump->{'sec'} = 0.0;
 836             } else {
 837                 $dump->{'bytes'} = $bytes+0;
 838                 $dump->{'kb'} = $kb+0;
 839                 $dump->{'sec'} = $secs+0.0;
 840             }
 841         }
 842         Amanda::Logfile::close_logfile($logh);
 843     }
 844
 845     return [ values %dumps], \@parts;
 846 }
 847
 848 sub get_parts {
 849     my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
 850     return @$parts;
 851 }
 852
 853 sub get_dumps {
 854     my %params = @_;
 855     my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
 856     my @dumps = @$dumps;
 857
 858     if (exists $params{'status'}) {
 859         @dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;
 860     }
 861
 862     return @dumps;
 863 }
 864
 865 sub sort_parts {
 866     my ($keys, @parts) = @_;
 867
 868     # TODO: make this more efficient by selecting the comparison
 869     # functions once, in advance, and just applying them
 870     return sort {
 871         my $res;
 872         for my $key (@$keys) {
 873             my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
 874
 875             if ($k =~ /^(partnum|filenum)$/) {
 876                 # compare part components numerically
 877                 $res = $a->{$k} <=> $b->{$k};
 878             } elsif ($k =~ /^(nparts|level)$/) {
 879                 # compare dump components numerically
 880                 $res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
 881             } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
 882                 # compare dump components alphabetically
 883                 $res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
 884             } else { # (label)
 885                 # compare part components alphabetically
 886                 $res = $a->{$k} cmp $b->{$k};
 887             }
 888             $res = -$res if ($rev eq '-' and $res);
 889             return $res if $res;
 890         }
 891         return 0;
 892     } @parts;
 893 }
 894
 895 sub sort_dumps {
 896     my ($keys, @dumps) = @_;
 897
 898     # TODO: make this more efficient by selecting the comparison
 899     # functions once, in advance, and just applying them
 900     return sort {
 901         my $res;
 902         for my $key (@$keys) {
 903             my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
 904
 905             if ($k =~ /^(nparts|level|filenum)$/) {
 906                 # compare dump components numerically
 907                 $res = $a->{$k} <=> $b->{$k};
 908             } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
 909                 # compare dump components alphabetically
 910                 $res = $a->{$k} cmp $b->{$k};
 911             }
 912             $res = -$res if ($rev eq '-' and $res);
 913             return $res if $res;
 914         }
 915         return 0;
 916     } @dumps;
 917 }
 918
 919 # caches for add_part() to avoid repeatedly looking up the log
 920 # filename for a particular write_timestamp.
 921 my $add_part_last_label = undef;
 922 my $add_part_last_write_timestamp = undef;
 923 my $add_part_last_logfile = undef;
 924
 925 sub add_part {
 926     my ($dump) = @_;
 927     my $found;
 928     my $logfh;
 929     my $logfile;
 930     my $find_result;
 931     my $logdir = getconf($CNF_LOGDIR);
 932     my ($last_filenum, $last_secs, $last_kbs);
 933
 934     # first order of business is to find out whether we need to make a new
 935     # dumpfile for this.
 936     my $write_timestamp = zeropad($dump->{'write_timestamp'});
 937     die "dump has no 'write_timestamp'" unless defined $write_timestamp;
 938
 939     # consult our one-element cache for this label and write_timestamp
 940     if (!defined $add_part_last_label
 941         or $add_part_last_label ne $dump->{'label'}
 942         or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {
 943
 944         # update the cache
 945         $add_part_last_logfile = undef;
 946         LOGFILE:
 947         for my $lf (Amanda::Logfile::find_log()) {
 948             next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 949             next unless (zeropad($log_timestamp) eq $write_timestamp);
 950
 951             # write timestamp matches; now check the label
 952             LOGFILE_DUMP:
 953             for $find_result (Amanda::Logfile::search_logfile(undef, undef,
 954                                         "$logdir/$lf", 1)) {
 955                 next unless (defined $find_result->{'label'});
 956
 957                 if ($find_result->{'label'} eq $dump->{'label'}) {
 958                     $add_part_last_label = $dump->{'label'};
 959                     $add_part_last_write_timestamp = $dump->{'write_timestamp'};
 960                     $add_part_last_logfile = $lf;
 961                     last LOGFILE;
 962                 }
 963             }
 964         }
 965     }
 966     $logfile = $add_part_last_logfile;
 967
 968     # truncate the write_timestamp if we're not using timestamps
 969     if (!getconf($CNF_USETIMESTAMPS)) {
 970         $write_timestamp = substr($write_timestamp, 0, 8);
 971     }
 972
 973     # get the information on the last dump and part in this logfile, or create
 974     # a new logfile if none exists, then open the logfile for writing.
 975     if (defined $logfile) {
 976         $last_filenum = -1;
 977
 978         # NOTE: this depends on an implementation detail of search_logfile: it
 979         # returns the results in the reverse order of appearance in the logfile.
 980         # Since we're concerned with the last elements of this logfile that we
 981         # will be appending to shortly, we simply reverse this list.  As this
 982         # package is rewritten to parse logfiles on its own (or access a relational
 983         # database), this implementation detail will no longer be relevant.
 984         my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
 985                                                     "$logdir/$logfile", 1);
 986         for $find_result (@find_results) {
 987             # filter out the non-dump error messages that find.c produces
 988             next unless (defined $find_result->{'label'});
 989
 990             $last_filenum = $find_result->{'filenum'};
 991
 992             # if this is part number 1, reset our secs and kbs counters on the
 993             # assumption that this is the beginning of a new dump
 994             if ($find_result->{'partnum'} == 1) {
 995                 $last_secs = $last_kbs = 0;
 996             }
 997             $last_secs += $find_result->{'sec'};
 998             $last_kbs += $find_result->{'kb'};
 999         }
1000
1001         open($logfh, ">>", "$logdir/$logfile");
1002     } else {
1003         $last_filenum = -1;
1004         $last_secs = 0;
1005         $last_kbs = 0;
1006
1007         # pick an unused log filename
1008         my $i = 0;
1009         while (1) {
1010             $logfile = "log.$write_timestamp.$i";
1011             last unless -f "$logdir/$logfile";
1012             $i++;
1013         }
1014
1015         open($logfh, ">", "$logdir/$logfile")
1016             or die("Could not write '$logdir/$logfile': $!");
1017
1018         print $logfh
1019             "INFO taper This logfile was generated by Amanda::DB::Catalog\n";
1020
1021         print $logfh
1022             "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";
1023
1024         if (!defined $tapelist) {
1025             _load_tapelist();
1026         } else {
1027             # reload the tapelist immediately, in case it's been modified
1028             $tapelist->reload();
1029         }
1030
1031         # see if we need to add an entry to the tapelist for this dump
1032         if (!grep { $_->{'label'} eq $dump->{'label'}
1033                     and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
1034                 } @{$tapelist->{tles}}) {
1035             $tapelist->reload(1);
1036             $tapelist->add_tapelabel($write_timestamp, $dump->{'label'}, undef, 1);
1037             $tapelist->write();
1038         }
1039     }
1040
1041     if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
1042         warn "Discontinuity in filenums in $logfile: " .
1043              "from $last_filenum to $dump->{filenum}";
1044     }
1045
1046     my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;
1047
1048     my $part_line = "PART taper ";
1049     $part_line .= "$dump->{label} ";
1050     $part_line .= "$dump->{filenum} ";
1051     $part_line .= quote_string($dump->{hostname}) . " ";
1052     $part_line .= quote_string($dump->{diskname}) . " ";
1053     $part_line .= "$dump->{dump_timestamp} ";
1054     $part_line .= "$dump->{partnum}/$dump->{nparts} ";
1055     $part_line .= "$dump->{level} ";
1056     $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
1057     print $logfh "$part_line\n";
1058
1059     # TODO: we don't always know nparts when writing a part, so
1060     # this is not always an effective way to detect a complete dump.
1061     # However, it works for purposes of data vaulting.
1062     if ($dump->{'partnum'} == $dump->{'nparts'}) {
1063         my $secs = $last_secs + $dump->{'sec'};
1064         my $kbs = $last_kbs + $dump->{'kb'};
1065         $kps = $secs? ($kbs + 0.0) / $secs : 0.0;
1066
1067         my $done_line = "DONE taper ";
1068         $done_line .= quote_string($dump->{hostname}) ." ";
1069         $done_line .= quote_string($dump->{diskname}) ." ";
1070         $done_line .= "$dump->{dump_timestamp} ";
1071         $done_line .= "$dump->{nparts} ";
1072         $done_line .= "$dump->{level} ";
1073         $done_line .= "[sec $secs kb $kbs kps $kps]";
1074         print $logfh "$done_line\n";
1075     }
1076
1077     close($logfh);
1078 }
1079
1080 sub _load_tapelist {
1081     if (!defined $tapelist) {
1082         my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
1083         $tapelist = Amanda::Tapelist->new($tapelist_filename);
1084     }
1085 }
1086
1087 sub _clear_cache { # (used by installcheck)
1088     $tapelist = undef;
1089 }
1090
1091 1;