git.gag.com Git - debian/amanda/blob - perl/Amanda/DB/Catalog.pm

   1 # Copyright (c) 2008, 2009, 2010 Zmanda, Inc.  All Rights Reserved.
   2 #
   3 # This program is free software; you can redistribute it and/or modify it
   4 # under the terms of the GNU General Public License version 2 as published
   5 # by the Free Software Foundation.
   6 #
   7 # This program is distributed in the hope that it will be useful, but
   8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   9 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  10 # for more details.
  11 #
  12 # You should have received a copy of the GNU General Public License along
  13 # with this program; if not, write to the Free Software Foundation, Inc.,
  14 # 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  15 #
  16 # Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
  17 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  18
  19 package Amanda::DB::Catalog;
  20
  21 =head1 NAME
  22
  23 Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
  24
  25 =head1 SYNOPSIS
  26
  27   use Amanda::DB::Catalog;
  28
  29   # get all dump timestamps on record
  30   my @timestamps = Amanda::DB::Catalog::get_timestamps();
  31
  32   # loop over those timestamps, printing dump info for each one
  33   for my $timestamp (@timestamps) {
  34       my @dumpfiles = Amanda::DB::Catalog::get_parts(
  35           timestamp => $timestamp,
  36           ok => 1
  37       );
  38       print "$timstamp:\n";
  39       for my $dumpfile (@dumpfiles) {
  40           print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname},
  41                 " level ", $dumpfile->{level}, "\n";
  42       }
  43   }
  44
  45 =head1 MODEL
  46
  47 The Amanda catalog is modeled as a set of dumps comprised of parts.  A dump is
  48 a complete bytestream received from an application, and is uniquely identified
  49 by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
  50 and C<write_timestamp>.  A dump may be partial, or even a complete failure.
  51
  52 A part corresponds to a single file on a volume, containing a portion of the
  53 data for a dump.  A part, then, is completely specified by a volume label and a
  54 file number (C<filenum>).  Each part has, among other things, a part number
  55 (C<partnum>) which gives its relative position within the dump.  The bytestream
  56 for a dump is recovered by concatenating all of the successful (C<status> = OK)
  57 parts matching the dump.
  58
  59 Files in the holding disk are considered part of the catalog, and are
  60 represented as single-part dumps (holding-disk chunking is ignored, as it is
  61 distinct from split parts).
  62
  63 =head2 DUMPS
  64
  65 The dump table contains one row per dump.  It has the following columns:
  66
  67 =over
  68
  69 =item dump_timestamp
  70
  71 (string) -- timestamp of the run in which the dump was created
  72
  73 =item write_timestamp
  74
  75 (string) -- timestamp of the run in which the part was written to this volume,
  76 or C<"00000000000000"> for dumps in the holding disk.
  77
  78 =item hostname
  79
  80 (string) -- dump hostname
  81
  82 =item diskname
  83
  84 (string) -- dump diskname
  85
  86 =item level
  87
  88 (integer) -- dump level
  89
  90 =item status
  91
  92 (string) -- The status of the dump - "OK", "PARTIAL", or "FAIL".  If a disk
  93 failed to dump at all, then it is not part of the catalog and thus will not
  94 have an associated dump row.
  95
  96 =item message
  97
  98 (string) -- reason for PARTIAL or FAIL status
  99
 100 =item nparts
 101
 102 (integer) -- number of successful parts in this dump
 103
 104 =item kb
 105
 106 (integer) -- size (in kb) of the dump on disk
 107
 108 =item orig_kb
 109
 110 (integer) -- size (in kb) of the complete dump (before compression or encryption); undef
 111 if not available
 112
 113 =item sec
 114
 115 (integer) -- time (in seconds) spent writing this part
 116
 117 =item parts
 118
 119 (arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
 120 always C<undef>).  When multiple partial parts are available, the choice of the
 121 partial that is included in this array is undefined.
 122
 123 =back
 124
 125 A dump is represented as a hashref with these keys.
 126
 127 The C<write_timestamp> gives the time of the amanda run in which the part was
 128 written to this volume.  The C<write_timestamp> may differ from the
 129 C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
 130 initial dump.
 131
 132 =head2 PARTS
 133
 134 The parts table contains one row per part, and has the following columns:
 135
 136 =over
 137
 138 =item label
 139
 140 (string) -- volume label (not present for holding files)
 141
 142 =item filenum
 143
 144 (integer) -- file on that volume (not present for holding files)
 145
 146 =item holding_file
 147
 148 (string) -- fully-qualified pathname of the holding file (not present for
 149 on-media dumps)
 150
 151 =item dump
 152
 153 (object ref) -- a reference to the dump containing this part
 154
 155 =item status
 156
 157 (string) -- The status of the part - "OK", "PARTIAL", or "FAILED".
 158
 159 =item partnum
 160
 161 (integer) -- part number of a split part (1-based)
 162
 163 =item kb
 164
 165 (integer) -- size (in kb) of this part
 166
 167 =item sec
 168
 169 (integer) -- time (in seconds) spent writing this part
 170
 171 =back
 172
 173 A part is represented as a hashref with these keys.  The C<label> and
 174 C<filenum> serve as a primary key.
 175
 176 Note that parts' C<dump> and dumps' C<parts> create a reference loop.  This is
 177 broken by making the C<parts> array's contents weak references in C<get_dumps>,
 178 and the C<dump> reference weak in C<get_parts>.
 179
 180 =head2 NOTES
 181
 182 All timestamps used in this module are full-length, in the format
 183 C<YYYYMMDDHHMMSS>.  If the underlying data contains only datestamps, they are
 184 zero-extended into timestamps: C<YYYYMMDD000000>.  A C<dump_timestamp> always
 185 corresponds to the initiation of the I<original> dump run, while
 186 C<write_timestamp> gives the time the file was written to the volume.  When
 187 parts are migrated from volume to volume (e.g., by I<amvault>), the
 188 C<dump_timestamp> does not change.
 189
 190 In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
 191 serves as a unique identifier for a dump bytestream, but because the bytestream
 192 may appear several times in the catalog (due to vaulting) the additional
 193 C<write_timestamp> is required to identify a particular on-storage instance of
 194 a dump.  Note that the part sizes may differ between instances, so it is not
 195 valid to concatenate parts from different dump instances.
 196
 197 =head1 INTERFACES
 198
 199 =head2 SUMMARY DATA
 200
 201 The following functions provide summary data based on the contents of the
 202 catalog.
 203
 204 =over
 205
 206 =item get_write_timestamps()
 207
 208 Get a list of all write timestamps, sorted in chronological order.
 209
 210 =item get_latest_write_timestamp()
 211
 212 Return the most recent write timestamp.
 213
 214 =item get_latest_write_timestamp(type => 'amvault')
 215 =item get_latest_write_timestamp(types => [ 'amvault', .. ])
 216
 217 Return the timestamp of the most recent dump of the given type or types.  The
 218 available types are given below for C<get_run_type>.
 219
 220 =item get_labels_written_at_timestamp($ts)
 221
 222 Return a list of labels for volumes written at the given timestamp.
 223
 224 =item get_run_type($ts)
 225
 226 Return the type of run made at the given timestamp.  The result is one of
 227 C<amvault>, C<amdump>, C<amflush>, or the default, C<unknown>.
 228
 229 =back
 230
 231 =head2 PARTS
 232
 233 =over
 234
 235 =item get_parts(%parameters)
 236
 237 This function returns a sequence of parts.  Values in C<%parameters> restrict
 238 the set of parts that are returned.  The hash can have any of the following
 239 keys:
 240
 241 =over
 242
 243 =item write_timestamp
 244
 245 restrict to parts written at this timestamp
 246
 247 =item write_timestamps
 248
 249 (arrayref) restrict to parts written at any of these timestamps (note that
 250 holding-disk files have no C<write_timestamp>, so this option and the previous
 251 will omit them)
 252
 253 =item dump_timestamp
 254
 255 restrict to parts with exactly this timestamp
 256
 257 =item dump_timestamps
 258
 259 (arrayref) restrict to parts with any of these timestamps
 260
 261 =item dump_timestamp_match
 262
 263 restrict to parts with timestamps matching this expression
 264
 265 =item holding
 266
 267 if true, only return dumps on holding disk.  If false, omit dumps on holding
 268 disk.
 269
 270 =item hostname
 271
 272 restrict to parts with exactly this hostname
 273
 274 =item hostnames
 275
 276 (arrayref) restrict to parts with any of these hostnames
 277
 278 =item hostname_match
 279
 280 restrict to parts with hostnames matching this expression
 281
 282 =item diskname
 283
 284 restrict to parts with exactly this diskname
 285
 286 =item disknames
 287
 288 (arrayref) restrict to parts with any of these disknames
 289
 290 =item diskname_match
 291
 292 restrict to parts with disknames matching this expression
 293
 294 =item label
 295
 296 restrict to parts with exactly this label
 297
 298 =item labels
 299
 300 (arrayref) restrict to parts with any of these labels
 301
 302 =item level
 303
 304 restrict to parts with exactly this level
 305
 306 =item levels
 307
 308 (arrayref) restrict to parts with any of these levels
 309
 310 =item status
 311
 312 restrict to parts with this status
 313
 314 =item dumpspecs
 315
 316 (arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs
 317
 318 =back
 319
 320 Match expressions are described in the amanda(8) manual page.
 321
 322 =item sort_parts([ $key1, $key2, .. ], @parts)
 323
 324 Given a list of parts, this function sorts that list by the requested keys.
 325 The following keys are available:
 326
 327 =over
 328
 329 =item hostname
 330
 331 =item diskname
 332
 333 =item write_timestamp
 334
 335 =item dump_timestamp
 336
 337 =item level
 338
 339 =item filenum
 340
 341 =item label
 342
 343 Note that this sorts labels I<lexically>, not necessarily in the order they were used!
 344
 345 =item partnum
 346
 347 =item nparts
 348
 349 =back
 350
 351 Keys are processed from left to right: if two dumps have the same value for
 352 C<$key1>, then C<$key2> is examined, and so on.  Key names may be prefixed by a
 353 dash (C<->) to reverse the order.
 354
 355 Note that some of these keys are dump keys; the function will automatically
 356 access those values via the C<dump> attribute.
 357
 358 =back
 359
 360 =head2 DUMPS
 361
 362 =over
 363
 364 =item get_dumps(%parameters)
 365
 366 This function returns a sequence of dumps.  Values in C<%parameters> restrict
 367 the set of dumps that are returned.  The same keys as are used for C<get_parts>
 368 are available here, with the exception of C<label> and C<labels>.  In this
 369 case, the C<status> parameter applies to the dump status, not the status of its
 370 constituent parts.
 371
 372 =item sort_dumps([ $key1, $key2 ], @dumps)
 373
 374 Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
 375 The same keys are available, with the exception of C<label>, C<filenum>, and
 376 C<partnum>.
 377
 378 =back
 379
 380 =head2 ADDING DATA
 381
 382 =over
 383
 384 =item add_part($part)
 385
 386 Add the given part to the database.  In terms of logfiles, this will either
 387 create a new logfile (if the part's C<write_timestamp> has not been seen
 388 before) or append to an existing logfile.  Note that a new logfile will require
 389 a corresponding new entry in the tapelist.
 390
 391 Note that no locking is performed: multiple simultaneous calls to this function
 392 can result in a corrupted or incorrect logfile.
 393
 394 TODO: add_dump
 395
 396 =back
 397
 398 =cut
 399
 400 use Amanda::Logfile qw( :constants match_disk match_host
 401                         match_datestamp match_level );
 402 use Amanda::Tapelist;
 403 use Amanda::Config qw( :init :getconf config_dir_relative );
 404 use Amanda::Util qw( quote_string weaken_ref );
 405 use File::Glob qw( :glob );
 406 use warnings;
 407 use strict;
 408
 409 # tapelist cache
 410 my $tapelist = undef;
 411
 412 # utility function
 413 sub zeropad {
 414     my ($timestamp) = @_;
 415     if (length($timestamp) == 8) {
 416         return $timestamp."000000";
 417     }
 418     return $timestamp;
 419 }
 420
 421 sub get_write_timestamps {
 422     my @rv;
 423
 424     # find_log assumes that the tapelist has been loaded, so load it now
 425     _load_tapelist();
 426
 427     for (Amanda::Logfile::find_log()) {
 428         next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 429         push @rv, zeropad($timestamp);
 430     }
 431
 432     return sort @rv;
 433 }
 434
 435 sub get_latest_write_timestamp {
 436     my %params = @_;
 437
 438     if ($params{'type'}) {
 439         push @{$params{'types'}}, $params{'type'};
 440     }
 441
 442     # get all of the timestamps and select the last one
 443     my @timestamps = get_write_timestamps();
 444
 445     if (@timestamps) {
 446         # if we're not looking for a particular type, then this is easy
 447         if (!exists $params{'types'}) {
 448             return $timestamps[-1];
 449         }
 450
 451         # otherwise we need to search backward until we find a logfile of
 452         # the right type
 453         while (@timestamps) {
 454             my $ts = pop @timestamps;
 455             my $typ = get_run_type($ts);
 456             if (grep { $_ eq $typ } @{$params{'types'}}) {
 457                 return $ts;
 458             }
 459         }
 460     }
 461
 462     return undef;
 463 }
 464
 465 sub get_run_type {
 466     my ($write_timestamp) = @_;
 467
 468     # find all of the logfiles with that name
 469     my $logdir = getconf($CNF_LOGDIR);
 470     my @matches = File::Glob::bsd_glob("$logdir/log.$write_timestamp.*", GLOB_NOSORT);
 471     if ($write_timestamp =~ /000000$/) {
 472         my $write_datestamp = substr($write_timestamp, 0, 8);
 473         push @matches, File::Glob::bsd_glob("$logdir/log.$write_datestamp.*", GLOB_NOSORT);
 474     }
 475
 476     for my $lf (@matches) {
 477         open(my $fh, "<", $lf) or next;
 478         while (<$fh>) {
 479             # amflush and amvault put their own names in
 480             return $1 if (/^START (amflush|amvault)/);
 481             # but for amdump we see planner
 482             return 'amdump' if (/^START planner/);
 483         }
 484     }
 485
 486     return "unknown";
 487 }
 488
 489
 490 # this generic function implements the loop of scanning logfiles to find
 491 # the requested data; get_parts and get_dumps then adjust the results to
 492 # match what the user expects.
 493 sub get_parts_and_dumps {
 494     my $get_what = shift; # "parts" or "dumps"
 495     my %params = @_;
 496     my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
 497
 498     # find_log assumes that the tapelist has been loaded, so load it now
 499     _load_tapelist();
 500
 501     # pre-process params by appending all of the "singular" parameters to the "plurals"
 502     push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'}
 503         if exists($params{'write_timestamp'});
 504     push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'}
 505         if exists($params{'dump_timestamp'});
 506     push @{$params{'hostnames'}}, $params{'hostname'}
 507         if exists($params{'hostname'});
 508     push @{$params{'disknames'}}, $params{'diskname'}
 509         if exists($params{'diskname'});
 510     push @{$params{'levels'}}, $params{'level'}
 511         if exists($params{'level'});
 512     if ($get_what eq 'parts') {
 513         push @{$params{'labels'}}, $params{'label'}
 514             if exists($params{'label'});
 515     } else {
 516         delete $params{'labels'};
 517     }
 518
 519     # specifying write_timestamps implies we won't check holding files
 520     if ($params{'write_timestamps'}) {
 521         if (defined $params{'holding'} and $params{'holding'}) {
 522             return [], []; # well, that's easy..
 523         }
 524         $params{'holding'} = 0;
 525     }
 526
 527     # Since we're working from logfiles, we have to pick the logfiles we'll use first.
 528     # Then we can use search_logfile.
 529     my @logfiles;
 530     if ($params{'holding'}) {
 531         @logfiles = ( 'holding', );
 532     } elsif (exists($params{'write_timestamps'})) {
 533         # if we have specific write_timestamps, the job is pretty easy.
 534         my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
 535         for my $logfile (Amanda::Logfile::find_log()) {
 536             next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 537             next unless (exists($timestamps_hash{zeropad($timestamp)}));
 538             push @logfiles, $logfile;
 539         }
 540     } elsif (exists($params{'dump_timestamps'})) {
 541         # otherwise, we need only look in logfiles at or after the earliest dump timestamp
 542         my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
 543         my $earliest_timestamp = $sorted_timestamps[0];
 544         for my $logfile (Amanda::Logfile::find_log()) {
 545             next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 546             next unless (zeropad($timestamp) ge $earliest_timestamp);
 547             push @logfiles, $logfile;
 548         }
 549     } else {
 550         # oh well -- it looks like we'll have to read all existing logfiles.
 551         @logfiles = Amanda::Logfile::find_log();
 552     }
 553
 554     # Set up some hash tables for speedy lookups of various attributes
 555     my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash);
 556     %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
 557         if (exists($params{'dump_timestamps'}));
 558     %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
 559         if (exists($params{'hostnames'}));
 560     %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
 561         if (exists($params{'disknames'}));
 562     %levels_hash = map { ($_, undef) } @{$params{'levels'}}
 563         if (exists($params{'levels'}));
 564     %labels_hash = map { ($_, undef) } @{$params{'labels'}}
 565         if (exists($params{'labels'}));
 566
 567     my %dumps;
 568     my @parts;
 569
 570     # *also* scan holding if the holding param wasn't specified
 571     if (!exists $params{'holding'}) {
 572         push @logfiles, 'holding';
 573     }
 574
 575     # now loop over those logfiles and use search_logfile to load the dumpfiles
 576     # from them, then process each entry from the logfile
 577     for my $logfile (@logfiles) {
 578         my (@find_results, $write_timestamp);
 579
 580         # get the raw contents from search_logfile, or use holding if
 581         # $logfile is undef
 582         if ($logfile ne 'holding') {
 583             @find_results = Amanda::Logfile::search_logfile(undef, undef,
 584                                                         "$logfile_dir/$logfile", 1);
 585             # convert to dumpfile hashes, including the write_timestamp from the logfile name
 586             my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
 587             $write_timestamp = zeropad($timestamp);
 588
 589         } else {
 590             @find_results = Amanda::Logfile::search_holding_disk();
 591             $write_timestamp = '00000000000000';
 592         }
 593
 594         # filter against *_match with dumps_match
 595         @find_results = Amanda::Logfile::dumps_match([@find_results],
 596             exists($params{'hostname_match'})? $params{'hostname_match'} : undef,
 597             exists($params{'diskname_match'})? $params{'diskname_match'} : undef,
 598             exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,
 599             undef,
 600             0);
 601
 602         # loop over each entry in the logfile.
 603         for my $find_result (@find_results) {
 604
 605             # filter out the non-dump error messages that find.c produces
 606             next unless (defined $find_result->{'label'});
 607
 608             # bail out on this result early, if possible
 609             next if (%dump_timestamps_hash
 610                 and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
 611             next if (%hostnames_hash
 612                 and !exists($hostnames_hash{$find_result->{'hostname'}}));
 613             next if (%disknames_hash
 614                 and !exists($disknames_hash{$find_result->{'diskname'}}));
 615             next if (%levels_hash
 616                 and !exists($levels_hash{$find_result->{'level'}}));
 617             next if (%labels_hash
 618                 and !exists($labels_hash{$find_result->{'label'}}));
 619             if ($get_what eq 'parts') {
 620                 next if (exists($params{'status'})
 621                     and defined $find_result->{'status'}
 622                     and $find_result->{'status'} ne $params{'status'});
 623             }
 624
 625             # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
 626             # tendency to produce duplicate results
 627             next if ($params{'dumpspecs'}
 628                 and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
 629                                                     $params{'dumpspecs'}, 0));
 630
 631             my $dump_timestamp = zeropad($find_result->{'timestamp'});
 632
 633             my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
 634                                      $write_timestamp, $find_result->{'level'}, $dump_timestamp);
 635             my $dump = $dumps{$dumpkey};
 636             if (!defined $dump) {
 637                 $dump = $dumps{$dumpkey} = {
 638                     dump_timestamp => $dump_timestamp,
 639                     write_timestamp => $write_timestamp,
 640                     hostname => $find_result->{'hostname'},
 641                     diskname => $find_result->{'diskname'},
 642                     level => $find_result->{'level'}+0,
 643                     orig_kb => $find_result->{'orig_kb'},
 644                     status => $find_result->{'dump_status'},
 645                     message => $find_result->{'message'},
 646                     # the rest of these params are unknown until we see a taper
 647                     # DONE, PARTIAL, or FAIL line, although we count nparts
 648                     # manually instead of relying on the logfile
 649                     nparts => 0,
 650                     kb => -1,
 651                     sec => -1,
 652                 };
 653             }
 654
 655             # start setting up a part hash for this result
 656             my %part;
 657             if ($logfile ne 'holding') {
 658                 # on-media dump
 659                 %part = (
 660                     label => $find_result->{'label'},
 661                     filenum => $find_result->{'filenum'},
 662                     dump => $dump,
 663                     status => $find_result->{'status'} || 'FAILED',
 664                     sec => $find_result->{'sec'},
 665                     kb => $find_result->{'kb'},
 666                     orig_kb => $find_result->{'orig_kb'},
 667                     partnum => $find_result->{'partnum'},
 668                 );
 669             } else {
 670                 # holding disk
 671                 %part = (
 672                     holding_file => $find_result->{'label'},
 673                     dump => $dump,
 674                     status => $find_result->{'status'} || 'FAILED',
 675                     sec => 0.0,
 676                     kb => $find_result->{'kb'},
 677                     orig_kb => $find_result->{'orig_kb'},
 678                     partnum => 1,
 679                 );
 680                 # and fix up the dump, too
 681                 $dump->{'status'} = $find_result->{'status'} || 'FAILED';
 682                 $dump->{'kb'} = $find_result->{'kb'};
 683                 $dump->{'sec'} = $find_result->{'sec'};
 684             }
 685
 686             # weaken the dump ref if we're returning dumps
 687             weaken_ref($part{'dump'})
 688                 if ($get_what eq 'dumps');
 689
 690             # count the number of successful parts in the dump
 691             $dump->{'nparts'}++ if $part{'status'} eq 'OK';
 692
 693             # and add a ref to the array of parts; if we're getting
 694             # parts, then this is a weak ref
 695             $dump->{'parts'}[$part{'partnum'}] = \%part;
 696             weaken_ref($dump->{'parts'}[$part{'partnum'}])
 697                 if ($get_what eq 'parts');
 698
 699             push @parts, \%part;
 700         }
 701
 702         # if these dumps were on the holding disk, then we're done
 703         next if $logfile eq 'holding';
 704
 705         # re-read the logfile to extract dump-level info that's not captured by
 706         # search_logfile
 707         my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
 708         die "logfile '$logfile' not found" unless $logh;
 709         while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
 710             next unless $prog == $P_TAPER;
 711             my $status;
 712             if ($type == $L_DONE) {
 713                 $status = 'OK';
 714             } elsif ($type == $L_PARTIAL) {
 715                 $status = 'PARTIAL';
 716             } elsif ($type == $L_FAIL) {
 717                 $status = 'FAIL';
 718             } elsif ($type == $L_SUCCESS) {
 719                 $status = "OK";
 720             } else {
 721                 next;
 722             }
 723
 724             # now extract the appropriate info; luckily these log lines have the same
 725             # format, more or less
 726             my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $message);
 727             ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
 728             ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
 729             ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
 730             if ($status ne 'FAIL' and $type != $L_SUCCESS) { # nparts is not in SUCCESS lines
 731                 ($nparts, $str) = Amanda::Util::skip_quoted_string($str);
 732             } else {
 733                 $nparts = 0;
 734             }
 735             ($level, $str) = Amanda::Util::skip_quoted_string($str);
 736             if ($status ne 'FAIL') {
 737                 my $s = $str;
 738                 ($secs, $kb, $str) = ($str =~ /^\[sec ([-0-9.]+) kb (\d+).*\] ?(.*)$/)
 739                     or die("'$s'");
 740                 $secs = 0.1 if ($secs <= 0);
 741             }
 742             if ($status ne 'OK') {
 743                 $message = $str;
 744             } else {
 745                 $message = '';
 746             }
 747
 748             $hostname = Amanda::Util::unquote_string($hostname);
 749             $diskname = Amanda::Util::unquote_string($diskname);
 750             $message = Amanda::Util::unquote_string($message) if $message;
 751
 752             # filter against dump criteria
 753             next if ($params{'dump_timestamp_match'}
 754                 and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
 755             next if (%dump_timestamps_hash
 756                 and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));
 757
 758             next if ($params{'hostname_match'}
 759                 and !match_host($params{'hostname_match'}, $hostname));
 760             next if (%hostnames_hash
 761                 and !exists($hostnames_hash{$hostname}));
 762
 763             next if ($params{'diskname_match'}
 764                 and !match_disk($params{'diskname_match'}, $diskname));
 765             next if (%disknames_hash
 766                 and !exists($disknames_hash{$diskname}));
 767
 768             next if (%levels_hash
 769                 and !exists($levels_hash{$level}));
 770             # get_dumps filters on status
 771
 772             if ($params{'dumpspecs'}) {
 773                 my $ok = 0;
 774                 for my $ds (@{$params{'dumpspecs'}}) {
 775                     # (the "". are for SWIG's benefit - SWIGged functions don't like
 776                     # strings generated by SWIG.  Long story.)
 777                     next if (defined $ds->{'host'}
 778                             and !match_host("".$ds->{'host'}, $hostname));
 779                     next if (defined $ds->{'disk'}
 780                             and !match_disk("".$ds->{'disk'}, $diskname));
 781                     next if (defined $ds->{'datestamp'}
 782                             and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
 783                     next if (defined $ds->{'level'}
 784                             and !match_level("".$ds->{'level'}, $level));
 785                     next if (defined $ds->{'write_timestamp'}
 786                              and !match_datestamp("".$ds->{'write_timestamp'}, $write_timestamp));
 787                     $ok = 1;
 788                     last;
 789                 }
 790                 next unless $ok;
 791             }
 792
 793             my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp,
 794                                      $level, zeropad($dump_timestamp));
 795             my $dump = $dumps{$dumpkey};
 796             if (!defined $dump) {
 797                 # this will happen when a dump has no parts - a FAILed dump.
 798                 $dump = $dumps{$dumpkey} = {
 799                     dump_timestamp => zeropad($dump_timestamp),
 800                     write_timestamp => $write_timestamp,
 801                     hostname => $hostname,
 802                     diskname => $diskname,
 803                     level => $level+0,
 804                     orig_kb => undef,
 805                     status => "FAILED",
 806                     # message set below
 807                     nparts => $nparts, # hopefully 0?
 808                     # kb set below
 809                     # sec set below
 810                 };
 811             }
 812
 813             $dump->{'message'} = $message;
 814             if ($status eq 'FAIL') {
 815                 $dump->{'kb'} = 0;
 816                 $dump->{'sec'} = 0.0;
 817             } else {
 818                 $dump->{'kb'} = $kb+0;
 819                 $dump->{'sec'} = $secs+0.0;
 820             }
 821         }
 822         Amanda::Logfile::close_logfile($logh);
 823     }
 824
 825     return [ values %dumps], \@parts;
 826 }
 827
 828 sub get_parts {
 829     my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
 830     return @$parts;
 831 }
 832
 833 sub get_dumps {
 834     my %params = @_;
 835     my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
 836     my @dumps = @$dumps;
 837
 838     if (exists $params{'status'}) {
 839         @dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;
 840     }
 841
 842     return @dumps;
 843 }
 844
 845 sub sort_parts {
 846     my ($keys, @parts) = @_;
 847
 848     # TODO: make this more efficient by selecting the comparison
 849     # functions once, in advance, and just applying them
 850     return sort {
 851         my $res;
 852         for my $key (@$keys) {
 853             my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
 854
 855             if ($k =~ /^(partnum|filenum)$/) {
 856                 # compare part components numerically
 857                 $res = $a->{$k} <=> $b->{$k};
 858             } elsif ($k =~ /^(nparts|level)$/) {
 859                 # compare dump components numerically
 860                 $res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
 861             } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
 862                 # compare dump components alphabetically
 863                 $res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
 864             } else { # (label)
 865                 # compare part components alphabetically
 866                 $res = $a->{$k} cmp $b->{$k};
 867             }
 868             $res = -$res if ($rev eq '-' and $res);
 869             return $res if $res;
 870         }
 871         return 0;
 872     } @parts;
 873 }
 874
 875 sub sort_dumps {
 876     my ($keys, @dumps) = @_;
 877
 878     # TODO: make this more efficient by selecting the comparison
 879     # functions once, in advance, and just applying them
 880     return sort {
 881         my $res;
 882         for my $key (@$keys) {
 883             my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
 884
 885             if ($k =~ /^(nparts|level)$/) {
 886                 # compare dump components numerically
 887                 $res = $a->{$k} <=> $b->{$k};
 888             } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
 889                 # compare dump components alphabetically
 890                 $res = $a->{$k} cmp $b->{$k};
 891             }
 892             $res = -$res if ($rev eq '-' and $res);
 893             return $res if $res;
 894         }
 895         return 0;
 896     } @dumps;
 897 }
 898
 899 # caches for add_part() to avoid repeatedly looking up the log
 900 # filename for a particular write_timestamp.
 901 my $add_part_last_label = undef;
 902 my $add_part_last_write_timestamp = undef;
 903 my $add_part_last_logfile = undef;
 904
 905 sub add_part {
 906     my ($dump) = @_;
 907     my $found;
 908     my $logfh;
 909     my $logfile;
 910     my $find_result;
 911     my $logdir = getconf($CNF_LOGDIR);
 912     my ($last_filenum, $last_secs, $last_kbs);
 913
 914     # first order of business is to find out whether we need to make a new
 915     # dumpfile for this.
 916     my $write_timestamp = zeropad($dump->{'write_timestamp'});
 917     die "dump has no 'write_timestamp'" unless defined $write_timestamp;
 918
 919     # consult our one-element cache for this label and write_timestamp
 920     if (!defined $add_part_last_label
 921         or $add_part_last_label ne $dump->{'label'}
 922         or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {
 923
 924         # update the cache
 925         $add_part_last_logfile = undef;
 926         LOGFILE:
 927         for my $lf (Amanda::Logfile::find_log()) {
 928             next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
 929             next unless (zeropad($log_timestamp) eq $write_timestamp);
 930
 931             # write timestamp matches; now check the label
 932             LOGFILE_DUMP:
 933             for $find_result (Amanda::Logfile::search_logfile(undef, undef,
 934                                         "$logdir/$lf", 1)) {
 935                 next unless (defined $find_result->{'label'});
 936
 937                 if ($find_result->{'label'} eq $dump->{'label'}) {
 938                     $add_part_last_label = $dump->{'label'};
 939                     $add_part_last_write_timestamp = $dump->{'write_timestamp'};
 940                     $add_part_last_logfile = $lf;
 941                     last LOGFILE;
 942                 }
 943             }
 944         }
 945     }
 946     $logfile = $add_part_last_logfile;
 947
 948     # truncate the write_timestamp if we're not using timestamps
 949     if (!getconf($CNF_USETIMESTAMPS)) {
 950         $write_timestamp = substr($write_timestamp, 0, 8);
 951     }
 952
 953     # get the information on the last dump and part in this logfile, or create
 954     # a new logfile if none exists, then open the logfile for writing.
 955     if (defined $logfile) {
 956         $last_filenum = -1;
 957
 958         # NOTE: this depends on an implementation detail of search_logfile: it
 959         # returns the results in the reverse order of appearance in the logfile.
 960         # Since we're concerned with the last elements of this logfile that we
 961         # will be appending to shortly, we simply reverse this list.  As this
 962         # package is rewritten to parse logfiles on its own (or access a relational
 963         # database), this implementation detail will no longer be relevant.
 964         my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
 965                                                     "$logdir/$logfile", 1);
 966         for $find_result (@find_results) {
 967             # filter out the non-dump error messages that find.c produces
 968             next unless (defined $find_result->{'label'});
 969
 970             $last_filenum = $find_result->{'filenum'};
 971
 972             # if this is part number 1, reset our secs and kbs counters on the
 973             # assumption that this is the beginning of a new dump
 974             if ($find_result->{'partnum'} == 1) {
 975                 $last_secs = $last_kbs = 0;
 976             }
 977             $last_secs += $find_result->{'sec'};
 978             $last_kbs += $find_result->{'kb'};
 979         }
 980
 981         open($logfh, ">>", "$logdir/$logfile");
 982     } else {
 983         $last_filenum = -1;
 984         $last_secs = 0;
 985         $last_kbs = 0;
 986
 987         # pick an unused log filename
 988         my $i = 0;
 989         while (1) {
 990             $logfile = "log.$write_timestamp.$i";
 991             last unless -f "$logdir/$logfile";
 992             $i++;
 993         }
 994
 995         open($logfh, ">", "$logdir/$logfile")
 996             or die("Could not write '$logdir/$logfile': $!");
 997
 998         print $logfh
 999             "INFO taper This logfile was generated by Amanda::DB::Catalog\n";
1000
1001         print $logfh
1002             "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";
1003
1004         if (!defined $tapelist) {
1005             _load_tapelist();
1006         } else {
1007             # reload the tapelist immediately, in case it's been modified
1008             $tapelist->reload();
1009         }
1010
1011         # see if we need to add an entry to the tapelist for this dump
1012         if (!grep { $_->{'label'} eq $dump->{'label'}
1013                     and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
1014                 } @{$tapelist->{tles}}) {
1015             $tapelist->reload(1);
1016             $tapelist->add_tapelabel($write_timestamp, $dump->{'label'}, undef, 1);
1017             $tapelist->write();
1018         }
1019     }
1020
1021     if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
1022         warn "Discontinuity in filenums in $logfile: " .
1023              "from $last_filenum to $dump->{filenum}";
1024     }
1025
1026     my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;
1027
1028     my $part_line = "PART taper ";
1029     $part_line .= "$dump->{label} ";
1030     $part_line .= "$dump->{filenum} ";
1031     $part_line .= quote_string($dump->{hostname}) . " ";
1032     $part_line .= quote_string($dump->{diskname}) . " ";
1033     $part_line .= "$dump->{dump_timestamp} ";
1034     $part_line .= "$dump->{partnum}/$dump->{nparts} ";
1035     $part_line .= "$dump->{level} ";
1036     $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
1037     print $logfh "$part_line\n";
1038
1039     # TODO: we don't always know nparts when writing a part, so
1040     # this is not always an effective way to detect a complete dump.
1041     # However, it works for purposes of data vaulting.
1042     if ($dump->{'partnum'} == $dump->{'nparts'}) {
1043         my $secs = $last_secs + $dump->{'sec'};
1044         my $kbs = $last_kbs + $dump->{'kb'};
1045         $kps = $secs? ($kbs + 0.0) / $secs : 0.0;
1046
1047         my $done_line = "DONE taper ";
1048         $done_line .= quote_string($dump->{hostname}) ." ";
1049         $done_line .= quote_string($dump->{diskname}) ." ";
1050         $done_line .= "$dump->{dump_timestamp} ";
1051         $done_line .= "$dump->{nparts} ";
1052         $done_line .= "$dump->{level} ";
1053         $done_line .= "[sec $secs kb $kbs kps $kps]";
1054         print $logfh "$done_line\n";
1055     }
1056
1057     close($logfh);
1058 }
1059
1060 sub _load_tapelist {
1061     if (!defined $tapelist) {
1062         my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
1063         $tapelist = Amanda::Tapelist->new($tapelist_filename);
1064     }
1065 }
1066
1067 sub _clear_cache { # (used by installcheck)
1068     $tapelist = undef;
1069 }
1070
1071 1;