1 # Copyright (c) 2008, 2009, 2010 Zmanda, Inc. All Rights Reserved.
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License version 2 as published
5 # by the Free Software Foundation.
7 # This program is distributed in the hope that it will be useful, but
8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
9 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 # You should have received a copy of the GNU General Public License along
13 # with this program; if not, write to the Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 # Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
17 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
19 package Amanda::DB::Catalog;
23 Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
27 use Amanda::DB::Catalog;
29 # get all dump timestamps on record
30 my @timestamps = Amanda::DB::Catalog::get_timestamps();
32 # loop over those timestamps, printing dump info for each one
33 for my $timestamp (@timestamps) {
34 my @dumpfiles = Amanda::DB::Catalog::get_parts(
35 timestamp => $timestamp,
39 for my $dumpfile (@dumpfiles) {
40 print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname},
41 " level ", $dumpfile->{level}, "\n";
47 The Amanda catalog is modeled as a set of dumps comprised of parts. A dump is
48 a complete bytestream received from an application, and is uniquely identified
49 by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
50 and C<write_timestamp>. A dump may be partial, or even a complete failure.
52 A part corresponds to a single file on a volume, containing a portion of the
53 data for a dump. A part, then, is completely specified by a volume label and a
54 file number (C<filenum>). Each part has, among other things, a part number
55 (C<partnum>) which gives its relative position within the dump. The bytestream
56 for a dump is recovered by concatenating all of the successful (C<status> = OK)
57 parts matching the dump.
59 Files in the holding disk are considered part of the catalog, and are
60 represented as single-part dumps (holding-disk chunking is ignored, as it is
61 distinct from split parts).
65 The dump table contains one row per dump. It has the following columns:
71 (string) -- timestamp of the run in which the dump was created
75 (string) -- timestamp of the run in which the part was written to this volume,
76 or C<"00000000000000"> for dumps in the holding disk.
80 (string) -- dump hostname
84 (string) -- dump diskname
88 (integer) -- dump level
92 (string) -- The status of the dump - "OK", "PARTIAL", or "FAIL". If a disk
93 failed to dump at all, then it is not part of the catalog and thus will not
94 have an associated dump row.
98 (string) -- reason for PARTIAL or FAIL status
102 (integer) -- number of successful parts in this dump
106 (integer) -- size (in bytes) of the dump on disk, 0 if the size is not known.
110 (integer) -- size (in kb) of the dump on disk
114 (integer) -- size (in kb) of the complete dump (before compression or encryption); undef
119 (integer) -- time (in seconds) spent writing this part
123 (arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
124 always C<undef>). When multiple partial parts are available, the choice of the
125 partial that is included in this array is undefined.
129 A dump is represented as a hashref with these keys.
131 The C<write_timestamp> gives the time of the amanda run in which the part was
132 written to this volume. The C<write_timestamp> may differ from the
133 C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
138 The parts table contains one row per part, and has the following columns:
144 (string) -- volume label (not present for holding files)
148 (integer) -- file on that volume (not present for holding files)
152 (string) -- fully-qualified pathname of the holding file (not present for
157 (object ref) -- a reference to the dump containing this part
161 (string) -- The status of the part - "OK", "PARTIAL", or "FAILED".
165 (integer) -- part number of a split part (1-based)
169 (integer) -- size (in kb) of this part
173 (integer) -- time (in seconds) spent writing this part
177 A part is represented as a hashref with these keys. The C<label> and
178 C<filenum> serve as a primary key.
180 Note that parts' C<dump> and dumps' C<parts> create a reference loop. This is
181 broken by making the C<parts> array's contents weak references in C<get_dumps>,
182 and the C<dump> reference weak in C<get_parts>.
186 All timestamps used in this module are full-length, in the format
187 C<YYYYMMDDHHMMSS>. If the underlying data contains only datestamps, they are
188 zero-extended into timestamps: C<YYYYMMDD000000>. A C<dump_timestamp> always
189 corresponds to the initiation of the I<original> dump run, while
190 C<write_timestamp> gives the time the file was written to the volume. When
191 parts are migrated from volume to volume (e.g., by I<amvault>), the
192 C<dump_timestamp> does not change.
194 In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
195 serves as a unique identifier for a dump bytestream, but because the bytestream
196 may appear several times in the catalog (due to vaulting) the additional
197 C<write_timestamp> is required to identify a particular on-storage instance of
198 a dump. Note that the part sizes may differ between instances, so it is not
199 valid to concatenate parts from different dump instances.
205 The following functions provide summary data based on the contents of the
210 =item get_write_timestamps()
212 Get a list of all write timestamps, sorted in chronological order.
214 =item get_latest_write_timestamp()
216 Return the most recent write timestamp.
218 =item get_latest_write_timestamp(type => 'amvault')
219 =item get_latest_write_timestamp(types => [ 'amvault', .. ])
221 Return the timestamp of the most recent dump of the given type or types. The
222 available types are given below for C<get_run_type>.
224 =item get_labels_written_at_timestamp($ts)
226 Return a list of labels for volumes written at the given timestamp.
228 =item get_run_type($ts)
230 Return the type of run made at the given timestamp. The result is one of
231 C<amvault>, C<amdump>, C<amflush>, or the default, C<unknown>.
239 =item get_parts(%parameters)
241 This function returns a sequence of parts. Values in C<%parameters> restrict
242 the set of parts that are returned. The hash can have any of the following
247 =item write_timestamp
249 restrict to parts written at this timestamp
251 =item write_timestamps
253 (arrayref) restrict to parts written at any of these timestamps (note that
254 holding-disk files have no C<write_timestamp>, so this option and the previous
259 restrict to parts with exactly this timestamp
261 =item dump_timestamps
263 (arrayref) restrict to parts with any of these timestamps
265 =item dump_timestamp_match
267 restrict to parts with timestamps matching this expression
271 if true, only return dumps on holding disk. If false, omit dumps on holding
276 restrict to parts with exactly this hostname
280 (arrayref) restrict to parts with any of these hostnames
284 restrict to parts with hostnames matching this expression
288 restrict to parts with exactly this diskname
292 (arrayref) restrict to parts with any of these disknames
296 restrict to parts with disknames matching this expression
300 restrict to parts with exactly this label
304 (arrayref) restrict to parts with any of these labels
308 restrict to parts with exactly this level
312 (arrayref) restrict to parts with any of these levels
316 restrict to parts with this status
320 (arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs
324 Match expressions are described in the amanda(8) manual page.
326 =item sort_parts([ $key1, $key2, .. ], @parts)
328 Given a list of parts, this function sorts that list by the requested keys.
329 The following keys are available:
337 =item write_timestamp
347 Note that this sorts labels I<lexically>, not necessarily in the order they were used!
355 Keys are processed from left to right: if two dumps have the same value for
356 C<$key1>, then C<$key2> is examined, and so on. Key names may be prefixed by a
357 dash (C<->) to reverse the order.
359 Note that some of these keys are dump keys; the function will automatically
360 access those values via the C<dump> attribute.
368 =item get_dumps(%parameters)
370 This function returns a sequence of dumps. Values in C<%parameters> restrict
371 the set of dumps that are returned. The same keys as are used for C<get_parts>
372 are available here, with the exception of C<label> and C<labels>. In this
373 case, the C<status> parameter applies to the dump status, not the status of its
376 =item sort_dumps([ $key1, $key2 ], @dumps)
378 Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
379 The same keys are available, with the exception of C<label>, C<filenum>, and
388 =item add_part($part)
390 Add the given part to the database. In terms of logfiles, this will either
391 create a new logfile (if the part's C<write_timestamp> has not been seen
392 before) or append to an existing logfile. Note that a new logfile will require
393 a corresponding new entry in the tapelist.
395 Note that no locking is performed: multiple simultaneous calls to this function
396 can result in a corrupted or incorrect logfile.
404 use Amanda::Logfile qw( :constants match_disk match_host
405 match_datestamp match_level );
406 use Amanda::Tapelist;
407 use Amanda::Config qw( :init :getconf config_dir_relative );
408 use Amanda::Util qw( quote_string weaken_ref );
409 use File::Glob qw( :glob );
414 my $tapelist = undef;
418 my ($timestamp) = @_;
419 if (length($timestamp) == 8) {
420 return $timestamp."000000";
425 sub get_write_timestamps {
428 # find_log assumes that the tapelist has been loaded, so load it now
431 for (Amanda::Logfile::find_log()) {
432 next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
433 push @rv, zeropad($timestamp);
439 sub get_latest_write_timestamp {
442 if ($params{'type'}) {
443 push @{$params{'types'}}, $params{'type'};
446 # get all of the timestamps and select the last one
447 my @timestamps = get_write_timestamps();
450 # if we're not looking for a particular type, then this is easy
451 if (!exists $params{'types'}) {
452 return $timestamps[-1];
455 # otherwise we need to search backward until we find a logfile of
457 while (@timestamps) {
458 my $ts = pop @timestamps;
459 my $typ = get_run_type($ts);
460 if (grep { $_ eq $typ } @{$params{'types'}}) {
470 my ($write_timestamp) = @_;
472 # find all of the logfiles with that name
473 my $logdir = getconf($CNF_LOGDIR);
474 my @matches = File::Glob::bsd_glob("$logdir/log.$write_timestamp.*", GLOB_NOSORT);
475 if ($write_timestamp =~ /000000$/) {
476 my $write_datestamp = substr($write_timestamp, 0, 8);
477 push @matches, File::Glob::bsd_glob("$logdir/log.$write_datestamp.*", GLOB_NOSORT);
480 for my $lf (@matches) {
481 open(my $fh, "<", $lf) or next;
483 # amflush and amvault put their own names in
484 return $1 if (/^START (amflush|amvault)/);
485 # but for amdump we see planner
486 return 'amdump' if (/^START planner/);
494 # this generic function implements the loop of scanning logfiles to find
495 # the requested data; get_parts and get_dumps then adjust the results to
496 # match what the user expects.
497 sub get_parts_and_dumps {
498 my $get_what = shift; # "parts" or "dumps"
500 my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
502 # find_log assumes that the tapelist has been loaded, so load it now
505 # pre-process params by appending all of the "singular" parameters to the "plurals"
506 push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'}
507 if exists($params{'write_timestamp'});
508 push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'}
509 if exists($params{'dump_timestamp'});
510 push @{$params{'hostnames'}}, $params{'hostname'}
511 if exists($params{'hostname'});
512 push @{$params{'disknames'}}, $params{'diskname'}
513 if exists($params{'diskname'});
514 push @{$params{'levels'}}, $params{'level'}
515 if exists($params{'level'});
516 if ($get_what eq 'parts') {
517 push @{$params{'labels'}}, $params{'label'}
518 if exists($params{'label'});
520 delete $params{'labels'};
523 # specifying write_timestamps implies we won't check holding files
524 if ($params{'write_timestamps'}) {
525 if (defined $params{'holding'} and $params{'holding'}) {
526 return [], []; # well, that's easy..
528 $params{'holding'} = 0;
531 # Since we're working from logfiles, we have to pick the logfiles we'll use first.
532 # Then we can use search_logfile.
534 if ($params{'holding'}) {
535 @logfiles = ( 'holding', );
536 } elsif (exists($params{'write_timestamps'})) {
537 # if we have specific write_timestamps, the job is pretty easy.
538 my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
539 for my $logfile (Amanda::Logfile::find_log()) {
540 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
541 next unless (exists($timestamps_hash{zeropad($timestamp)}));
542 push @logfiles, $logfile;
544 } elsif (exists($params{'dump_timestamps'})) {
545 # otherwise, we need only look in logfiles at or after the earliest dump timestamp
546 my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
547 my $earliest_timestamp = $sorted_timestamps[0];
548 for my $logfile (Amanda::Logfile::find_log()) {
549 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
550 next unless (zeropad($timestamp) ge $earliest_timestamp);
551 push @logfiles, $logfile;
554 # oh well -- it looks like we'll have to read all existing logfiles.
555 @logfiles = Amanda::Logfile::find_log();
558 # Set up some hash tables for speedy lookups of various attributes
559 my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash);
560 %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
561 if (exists($params{'dump_timestamps'}));
562 %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
563 if (exists($params{'hostnames'}));
564 %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
565 if (exists($params{'disknames'}));
566 %levels_hash = map { ($_, undef) } @{$params{'levels'}}
567 if (exists($params{'levels'}));
568 %labels_hash = map { ($_, undef) } @{$params{'labels'}}
569 if (exists($params{'labels'}));
574 # *also* scan holding if the holding param wasn't specified
575 if (!exists $params{'holding'}) {
576 push @logfiles, 'holding';
579 # now loop over those logfiles and use search_logfile to load the dumpfiles
580 # from them, then process each entry from the logfile
581 for my $logfile (@logfiles) {
582 my (@find_results, $write_timestamp);
584 # get the raw contents from search_logfile, or use holding if
586 if ($logfile ne 'holding') {
587 @find_results = Amanda::Logfile::search_logfile(undef, undef,
588 "$logfile_dir/$logfile", 1);
589 # convert to dumpfile hashes, including the write_timestamp from the logfile name
590 my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
591 $write_timestamp = zeropad($timestamp);
594 @find_results = Amanda::Logfile::search_holding_disk();
595 $write_timestamp = '00000000000000';
598 # filter against *_match with dumps_match
599 @find_results = Amanda::Logfile::dumps_match([@find_results],
600 exists($params{'hostname_match'})? $params{'hostname_match'} : undef,
601 exists($params{'diskname_match'})? $params{'diskname_match'} : undef,
602 exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,
606 # loop over each entry in the logfile.
607 for my $find_result (@find_results) {
609 # filter out the non-dump error messages that find.c produces
610 next unless (defined $find_result->{'label'});
612 # bail out on this result early, if possible
613 next if (%dump_timestamps_hash
614 and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
615 next if (%hostnames_hash
616 and !exists($hostnames_hash{$find_result->{'hostname'}}));
617 next if (%disknames_hash
618 and !exists($disknames_hash{$find_result->{'diskname'}}));
619 next if (%levels_hash
620 and !exists($levels_hash{$find_result->{'level'}}));
621 next if (%labels_hash
622 and !exists($labels_hash{$find_result->{'label'}}));
623 if ($get_what eq 'parts') {
624 next if (exists($params{'status'})
625 and defined $find_result->{'status'}
626 and $find_result->{'status'} ne $params{'status'});
629 # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
630 # tendency to produce duplicate results
631 next if ($params{'dumpspecs'}
632 and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
633 $params{'dumpspecs'}, 0));
635 my $dump_timestamp = zeropad($find_result->{'timestamp'});
637 my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
638 $write_timestamp, $find_result->{'level'}, $dump_timestamp);
639 my $dump = $dumps{$dumpkey};
640 if (!defined $dump) {
641 $dump = $dumps{$dumpkey} = {
642 dump_timestamp => $dump_timestamp,
643 write_timestamp => $write_timestamp,
644 hostname => $find_result->{'hostname'},
645 diskname => $find_result->{'diskname'},
646 level => $find_result->{'level'}+0,
647 orig_kb => $find_result->{'orig_kb'},
648 status => $find_result->{'dump_status'},
649 message => $find_result->{'message'},
650 # the rest of these params are unknown until we see a taper
651 # DONE, PARTIAL, or FAIL line, although we count nparts
652 # manually instead of relying on the logfile
653 nparts => 0, # $find_result->{'totalparts'}
654 bytes => -1, # $find_result->{'bytes'}
655 kb => -1, # $find_result->{'kb'}
656 sec => -1, # $find_result->{'sec'}
660 # start setting up a part hash for this result
662 if ($logfile ne 'holding') {
665 label => $find_result->{'label'},
666 filenum => $find_result->{'filenum'},
668 status => $find_result->{'status'} || 'FAILED',
669 sec => $find_result->{'sec'},
670 kb => $find_result->{'kb'},
671 orig_kb => $find_result->{'orig_kb'},
672 partnum => $find_result->{'partnum'},
677 holding_file => $find_result->{'label'},
679 status => $find_result->{'status'} || 'FAILED',
681 kb => $find_result->{'kb'},
682 orig_kb => $find_result->{'orig_kb'},
685 # and fix up the dump, too
686 $dump->{'status'} = $find_result->{'status'} || 'FAILED';
687 $dump->{'bytes'} = $find_result->{'bytes'};
688 $dump->{'kb'} = $find_result->{'kb'};
689 $dump->{'sec'} = $find_result->{'sec'};
692 # weaken the dump ref if we're returning dumps
693 weaken_ref($part{'dump'})
694 if ($get_what eq 'dumps');
696 # count the number of successful parts in the dump
697 $dump->{'nparts'}++ if $part{'status'} eq 'OK';
699 # and add a ref to the array of parts; if we're getting
700 # parts, then this is a weak ref
701 $dump->{'parts'}[$part{'partnum'}] = \%part;
702 weaken_ref($dump->{'parts'}[$part{'partnum'}])
703 if ($get_what eq 'parts');
708 # if these dumps were on the holding disk, then we're done
709 next if $logfile eq 'holding';
711 # re-read the logfile to extract dump-level info that's not captured by
713 my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
714 die "logfile '$logfile' not found" unless $logh;
715 while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
716 next unless $prog == $P_TAPER;
718 if ($type == $L_DONE) {
720 } elsif ($type == $L_PARTIAL) {
722 } elsif ($type == $L_FAIL) {
724 } elsif ($type == $L_SUCCESS) {
730 # now extract the appropriate info; luckily these log lines have the same
731 # format, more or less
732 my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $bytes, $message);
733 ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
734 ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
735 ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
736 if ($status ne 'FAIL' and $type != $L_SUCCESS) { # nparts is not in SUCCESS lines
737 ($nparts, my $str1) = Amanda::Util::skip_quoted_string($str);
738 if (substr($str1, 0,1) ne '[') {
740 } else { # nparts is not in all PARTIAL lines
747 ($level, $str) = Amanda::Util::skip_quoted_string($str);
748 if ($status ne 'FAIL') {
751 ($secs, $b_unit, $kb, $str) = ($str =~ /^\[sec ([-0-9.]+) (kb|bytes) ([-0-9]+).*\] ?(.*)$/)
753 if ($b_unit eq 'bytes') {
759 $secs = 0.1 if ($secs <= 0);
761 if ($status ne 'OK') {
767 $hostname = Amanda::Util::unquote_string($hostname);
768 $diskname = Amanda::Util::unquote_string($diskname);
769 $message = Amanda::Util::unquote_string($message) if $message;
771 # filter against dump criteria
772 next if ($params{'dump_timestamp_match'}
773 and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
774 next if (%dump_timestamps_hash
775 and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));
777 next if ($params{'hostname_match'}
778 and !match_host($params{'hostname_match'}, $hostname));
779 next if (%hostnames_hash
780 and !exists($hostnames_hash{$hostname}));
782 next if ($params{'diskname_match'}
783 and !match_disk($params{'diskname_match'}, $diskname));
784 next if (%disknames_hash
785 and !exists($disknames_hash{$diskname}));
787 next if (%levels_hash
788 and !exists($levels_hash{$level}));
789 # get_dumps filters on status
791 if ($params{'dumpspecs'}) {
793 for my $ds (@{$params{'dumpspecs'}}) {
794 # (the "". are for SWIG's benefit - SWIGged functions don't like
795 # strings generated by SWIG. Long story.)
796 next if (defined $ds->{'host'}
797 and !match_host("".$ds->{'host'}, $hostname));
798 next if (defined $ds->{'disk'}
799 and !match_disk("".$ds->{'disk'}, $diskname));
800 next if (defined $ds->{'datestamp'}
801 and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
802 next if (defined $ds->{'level'}
803 and !match_level("".$ds->{'level'}, $level));
804 next if (defined $ds->{'write_timestamp'}
805 and !match_datestamp("".$ds->{'write_timestamp'}, $write_timestamp));
812 my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp,
813 $level, zeropad($dump_timestamp));
814 my $dump = $dumps{$dumpkey};
815 if (!defined $dump) {
816 # this will happen when a dump has no parts - a FAILed dump.
817 $dump = $dumps{$dumpkey} = {
818 dump_timestamp => zeropad($dump_timestamp),
819 write_timestamp => $write_timestamp,
820 hostname => $hostname,
821 diskname => $diskname,
826 nparts => $nparts, # hopefully 0?
832 $dump->{'message'} = $message;
833 if ($status eq 'FAIL') {
834 $dump->{'bytes'} = 0;
836 $dump->{'sec'} = 0.0;
838 $dump->{'bytes'} = $bytes+0;
839 $dump->{'kb'} = $kb+0;
840 $dump->{'sec'} = $secs+0.0;
843 Amanda::Logfile::close_logfile($logh);
846 return [ values %dumps], \@parts;
850 my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
856 my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
859 if (exists $params{'status'}) {
860 @dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;
867 my ($keys, @parts) = @_;
869 # TODO: make this more efficient by selecting the comparison
870 # functions once, in advance, and just applying them
873 for my $key (@$keys) {
874 my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
876 if ($k =~ /^(partnum|filenum)$/) {
877 # compare part components numerically
878 $res = $a->{$k} <=> $b->{$k};
879 } elsif ($k =~ /^(nparts|level)$/) {
880 # compare dump components numerically
881 $res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
882 } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
883 # compare dump components alphabetically
884 $res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
886 # compare part components alphabetically
887 $res = $a->{$k} cmp $b->{$k};
889 $res = -$res if ($rev eq '-' and $res);
897 my ($keys, @dumps) = @_;
899 # TODO: make this more efficient by selecting the comparison
900 # functions once, in advance, and just applying them
903 for my $key (@$keys) {
904 my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
906 if ($k =~ /^(nparts|level)$/) {
907 # compare dump components numerically
908 $res = $a->{$k} <=> $b->{$k};
909 } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
910 # compare dump components alphabetically
911 $res = $a->{$k} cmp $b->{$k};
913 $res = -$res if ($rev eq '-' and $res);
920 # caches for add_part() to avoid repeatedly looking up the log
921 # filename for a particular write_timestamp.
922 my $add_part_last_label = undef;
923 my $add_part_last_write_timestamp = undef;
924 my $add_part_last_logfile = undef;
932 my $logdir = getconf($CNF_LOGDIR);
933 my ($last_filenum, $last_secs, $last_kbs);
935 # first order of business is to find out whether we need to make a new
937 my $write_timestamp = zeropad($dump->{'write_timestamp'});
938 die "dump has no 'write_timestamp'" unless defined $write_timestamp;
940 # consult our one-element cache for this label and write_timestamp
941 if (!defined $add_part_last_label
942 or $add_part_last_label ne $dump->{'label'}
943 or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {
946 $add_part_last_logfile = undef;
948 for my $lf (Amanda::Logfile::find_log()) {
949 next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
950 next unless (zeropad($log_timestamp) eq $write_timestamp);
952 # write timestamp matches; now check the label
954 for $find_result (Amanda::Logfile::search_logfile(undef, undef,
956 next unless (defined $find_result->{'label'});
958 if ($find_result->{'label'} eq $dump->{'label'}) {
959 $add_part_last_label = $dump->{'label'};
960 $add_part_last_write_timestamp = $dump->{'write_timestamp'};
961 $add_part_last_logfile = $lf;
967 $logfile = $add_part_last_logfile;
969 # truncate the write_timestamp if we're not using timestamps
970 if (!getconf($CNF_USETIMESTAMPS)) {
971 $write_timestamp = substr($write_timestamp, 0, 8);
974 # get the information on the last dump and part in this logfile, or create
975 # a new logfile if none exists, then open the logfile for writing.
976 if (defined $logfile) {
979 # NOTE: this depends on an implementation detail of search_logfile: it
980 # returns the results in the reverse order of appearance in the logfile.
981 # Since we're concerned with the last elements of this logfile that we
982 # will be appending to shortly, we simply reverse this list. As this
983 # package is rewritten to parse logfiles on its own (or access a relational
984 # database), this implementation detail will no longer be relevant.
985 my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
986 "$logdir/$logfile", 1);
987 for $find_result (@find_results) {
988 # filter out the non-dump error messages that find.c produces
989 next unless (defined $find_result->{'label'});
991 $last_filenum = $find_result->{'filenum'};
993 # if this is part number 1, reset our secs and kbs counters on the
994 # assumption that this is the beginning of a new dump
995 if ($find_result->{'partnum'} == 1) {
996 $last_secs = $last_kbs = 0;
998 $last_secs += $find_result->{'sec'};
999 $last_kbs += $find_result->{'kb'};
1002 open($logfh, ">>", "$logdir/$logfile");
1008 # pick an unused log filename
1011 $logfile = "log.$write_timestamp.$i";
1012 last unless -f "$logdir/$logfile";
1016 open($logfh, ">", "$logdir/$logfile")
1017 or die("Could not write '$logdir/$logfile': $!");
1020 "INFO taper This logfile was generated by Amanda::DB::Catalog\n";
1023 "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";
1025 if (!defined $tapelist) {
1028 # reload the tapelist immediately, in case it's been modified
1029 $tapelist->reload();
1032 # see if we need to add an entry to the tapelist for this dump
1033 if (!grep { $_->{'label'} eq $dump->{'label'}
1034 and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
1035 } @{$tapelist->{tles}}) {
1036 $tapelist->reload(1);
1037 $tapelist->add_tapelabel($write_timestamp, $dump->{'label'}, undef, 1);
1042 if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
1043 warn "Discontinuity in filenums in $logfile: " .
1044 "from $last_filenum to $dump->{filenum}";
1047 my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;
1049 my $part_line = "PART taper ";
1050 $part_line .= "$dump->{label} ";
1051 $part_line .= "$dump->{filenum} ";
1052 $part_line .= quote_string($dump->{hostname}) . " ";
1053 $part_line .= quote_string($dump->{diskname}) . " ";
1054 $part_line .= "$dump->{dump_timestamp} ";
1055 $part_line .= "$dump->{partnum}/$dump->{nparts} ";
1056 $part_line .= "$dump->{level} ";
1057 $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
1058 print $logfh "$part_line\n";
1060 # TODO: we don't always know nparts when writing a part, so
1061 # this is not always an effective way to detect a complete dump.
1062 # However, it works for purposes of data vaulting.
1063 if ($dump->{'partnum'} == $dump->{'nparts'}) {
1064 my $secs = $last_secs + $dump->{'sec'};
1065 my $kbs = $last_kbs + $dump->{'kb'};
1066 $kps = $secs? ($kbs + 0.0) / $secs : 0.0;
1068 my $done_line = "DONE taper ";
1069 $done_line .= quote_string($dump->{hostname}) ." ";
1070 $done_line .= quote_string($dump->{diskname}) ." ";
1071 $done_line .= "$dump->{dump_timestamp} ";
1072 $done_line .= "$dump->{nparts} ";
1073 $done_line .= "$dump->{level} ";
1074 $done_line .= "[sec $secs kb $kbs kps $kps]";
1075 print $logfh "$done_line\n";
1081 sub _load_tapelist {
1082 if (!defined $tapelist) {
1083 my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
1084 $tapelist = Amanda::Tapelist->new($tapelist_filename);
1088 sub _clear_cache { # (used by installcheck)