1 # Copyright (c) 2008-2012 Zmanda, Inc. All Rights Reserved.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful, but
9 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 # You should have received a copy of the GNU General Public License along
14 # with this program; if not, write to the Free Software Foundation, Inc.,
15 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 # Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
18 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
20 package Amanda::DB::Catalog;
24 Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
28 use Amanda::DB::Catalog;
30 # get all dump timestamps on record
31 my @timestamps = Amanda::DB::Catalog::get_timestamps();
33 # loop over those timestamps, printing dump info for each one
34 for my $timestamp (@timestamps) {
35 my @dumpfiles = Amanda::DB::Catalog::get_parts(
36 timestamp => $timestamp,
40 for my $dumpfile (@dumpfiles) {
41 print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname},
42 " level ", $dumpfile->{level}, "\n";
48 The Amanda catalog is modeled as a set of dumps comprised of parts. A dump is
49 a complete bytestream received from an application, and is uniquely identified
50 by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
51 and C<write_timestamp>. A dump may be partial, or even a complete failure.
53 A part corresponds to a single file on a volume, containing a portion of the
54 data for a dump. A part, then, is completely specified by a volume label and a
55 file number (C<filenum>). Each part has, among other things, a part number
56 (C<partnum>) which gives its relative position within the dump. The bytestream
57 for a dump is recovered by concatenating all of the successful (C<status> = OK)
58 parts matching the dump.
60 Files in the holding disk are considered part of the catalog, and are
61 represented as single-part dumps (holding-disk chunking is ignored, as it is
62 distinct from split parts).
66 The dump table contains one row per dump. It has the following columns:
72 (string) -- timestamp of the run in which the dump was created
76 (string) -- timestamp of the run in which the part was written to this volume,
77 or C<"00000000000000"> for dumps in the holding disk.
81 (string) -- dump hostname
85 (string) -- dump diskname
89 (integer) -- dump level
93 (string) -- The status of the dump - "OK", "PARTIAL", or "FAIL". If a disk
94 failed to dump at all, then it is not part of the catalog and thus will not
95 have an associated dump row.
99 (string) -- reason for PARTIAL or FAIL status
103 (integer) -- number of successful parts in this dump
107 (integer) -- size (in bytes) of the dump on disk, 0 if the size is not known.
111 (integer) -- size (in kb) of the dump on disk
115 (integer) -- size (in kb) of the complete dump (before compression or encryption); undef
120 (integer) -- time (in seconds) spent writing this part
124 (arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
125 always C<undef>). When multiple partial parts are available, the choice of the
126 partial that is included in this array is undefined.
130 A dump is represented as a hashref with these keys.
132 The C<write_timestamp> gives the time of the amanda run in which the part was
133 written to this volume. The C<write_timestamp> may differ from the
134 C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
139 The parts table contains one row per part, and has the following columns:
145 (string) -- volume label (not present for holding files)
149 (integer) -- file on that volume (not present for holding files)
153 (string) -- fully-qualified pathname of the holding file (not present for
158 (object ref) -- a reference to the dump containing this part
162 (string) -- The status of the part - "OK", "PARTIAL", or "FAILED".
166 (integer) -- part number of a split part (1-based)
170 (integer) -- size (in kb) of this part
174 (integer) -- time (in seconds) spent writing this part
178 A part is represented as a hashref with these keys. The C<label> and
179 C<filenum> serve as a primary key.
181 Note that parts' C<dump> and dumps' C<parts> create a reference loop. This is
182 broken by making the C<parts> array's contents weak references in C<get_dumps>,
183 and the C<dump> reference weak in C<get_parts>.
187 All timestamps used in this module are full-length, in the format
188 C<YYYYMMDDHHMMSS>. If the underlying data contains only datestamps, they are
189 zero-extended into timestamps: C<YYYYMMDD000000>. A C<dump_timestamp> always
190 corresponds to the initiation of the I<original> dump run, while
191 C<write_timestamp> gives the time the file was written to the volume. When
192 parts are migrated from volume to volume (e.g., by I<amvault>), the
193 C<dump_timestamp> does not change.
195 In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
196 serves as a unique identifier for a dump bytestream, but because the bytestream
197 may appear several times in the catalog (due to vaulting) the additional
198 C<write_timestamp> is required to identify a particular on-storage instance of
199 a dump. Note that the part sizes may differ between instances, so it is not
200 valid to concatenate parts from different dump instances.
206 The following functions provide summary data based on the contents of the
211 =item get_write_timestamps()
213 Get a list of all write timestamps, sorted in chronological order.
215 =item get_latest_write_timestamp()
217 Return the most recent write timestamp.
219 =item get_latest_write_timestamp(type => 'amvault')
220 =item get_latest_write_timestamp(types => [ 'amvault', .. ])
222 Return the timestamp of the most recent dump of the given type or types. The
223 available types are given below for C<get_run_type>.
225 =item get_labels_written_at_timestamp($ts)
227 Return a list of labels for volumes written at the given timestamp.
229 =item get_run_type($ts)
231 Return the type of run made at the given timestamp. The result is one of
232 C<amvault>, C<amdump>, C<amflush>, or the default, C<unknown>.
240 =item get_parts(%parameters)
242 This function returns a sequence of parts. Values in C<%parameters> restrict
243 the set of parts that are returned. The hash can have any of the following
248 =item write_timestamp
250 restrict to parts written at this timestamp
252 =item write_timestamps
254 (arrayref) restrict to parts written at any of these timestamps (note that
255 holding-disk files have no C<write_timestamp>, so this option and the previous
260 restrict to parts with exactly this timestamp
262 =item dump_timestamps
264 (arrayref) restrict to parts with any of these timestamps
266 =item dump_timestamp_match
268 restrict to parts with timestamps matching this expression
272 if true, only return dumps on holding disk. If false, omit dumps on holding
277 restrict to parts with exactly this hostname
281 (arrayref) restrict to parts with any of these hostnames
285 restrict to parts with hostnames matching this expression
289 restrict to parts with exactly this diskname
293 (arrayref) restrict to parts with any of these disknames
297 restrict to parts with disknames matching this expression
301 restrict to parts with exactly this label
305 (arrayref) restrict to parts with any of these labels
309 restrict to parts with exactly this level
313 (arrayref) restrict to parts with any of these levels
317 restrict to parts with this status
321 (arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs
325 Match expressions are described in the amanda(8) manual page.
327 =item sort_parts([ $key1, $key2, .. ], @parts)
329 Given a list of parts, this function sorts that list by the requested keys.
330 The following keys are available:
338 =item write_timestamp
348 Note that this sorts labels I<lexically>, not necessarily in the order they were used!
356 Keys are processed from left to right: if two dumps have the same value for
357 C<$key1>, then C<$key2> is examined, and so on. Key names may be prefixed by a
358 dash (C<->) to reverse the order.
360 Note that some of these keys are dump keys; the function will automatically
361 access those values via the C<dump> attribute.
369 =item get_dumps(%parameters)
371 This function returns a sequence of dumps. Values in C<%parameters> restrict
372 the set of dumps that are returned. The same keys as are used for C<get_parts>
373 are available here, with the exception of C<label> and C<labels>. In this
374 case, the C<status> parameter applies to the dump status, not the status of its
377 =item sort_dumps([ $key1, $key2 ], @dumps)
379 Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
380 The same keys are available, with the exception of C<label>, C<filenum>, and
389 =item add_part($part)
391 Add the given part to the database. In terms of logfiles, this will either
392 create a new logfile (if the part's C<write_timestamp> has not been seen
393 before) or append to an existing logfile. Note that a new logfile will require
394 a corresponding new entry in the tapelist.
396 Note that no locking is performed: multiple simultaneous calls to this function
397 can result in a corrupted or incorrect logfile.
405 use Amanda::Logfile qw( :constants );
406 use Amanda::Tapelist;
407 use Amanda::Config qw( :init :getconf config_dir_relative );
408 use Amanda::Util qw( quote_string weaken_ref match_disk match_host match_datestamp match_level);
409 use File::Glob qw( :glob );
414 my $tapelist = undef;
418 my ($timestamp) = @_;
419 if (length($timestamp) == 8) {
420 return $timestamp."000000";
425 sub get_write_timestamps {
428 # find_log assumes that the tapelist has been loaded, so load it now
431 for (Amanda::Logfile::find_log()) {
432 next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
433 push @rv, zeropad($timestamp);
439 sub get_latest_write_timestamp {
442 if ($params{'type'}) {
443 push @{$params{'types'}}, $params{'type'};
446 # get all of the timestamps and select the last one
447 my @timestamps = get_write_timestamps();
450 # if we're not looking for a particular type, then this is easy
451 if (!exists $params{'types'}) {
452 return $timestamps[-1];
455 # otherwise we need to search backward until we find a logfile of
457 while (@timestamps) {
458 my $ts = pop @timestamps;
459 my $typ = get_run_type($ts);
460 if (grep { $_ eq $typ } @{$params{'types'}}) {
470 my ($write_timestamp) = @_;
472 # find all of the logfiles with that name
473 my $logdir = getconf($CNF_LOGDIR);
474 my @matches = File::Glob::bsd_glob("$logdir/log.$write_timestamp.*", GLOB_NOSORT);
475 if ($write_timestamp =~ /000000$/) {
476 my $write_datestamp = substr($write_timestamp, 0, 8);
477 push @matches, File::Glob::bsd_glob("$logdir/log.$write_datestamp.*", GLOB_NOSORT);
480 for my $lf (@matches) {
481 open(my $fh, "<", $lf) or next;
483 # amflush and amvault put their own names in
484 return $1 if (/^START (amflush|amvault)/);
485 # but for amdump we see planner
486 return 'amdump' if (/^START planner/);
494 # this generic function implements the loop of scanning logfiles to find
495 # the requested data; get_parts and get_dumps then adjust the results to
496 # match what the user expects.
497 sub get_parts_and_dumps {
498 my $get_what = shift; # "parts" or "dumps"
500 my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
502 # find_log assumes that the tapelist has been loaded, so load it now
505 # pre-process params by appending all of the "singular" parameters to the "plurals"
506 push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'}
507 if exists($params{'write_timestamp'});
508 push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'}
509 if exists($params{'dump_timestamp'});
510 push @{$params{'hostnames'}}, $params{'hostname'}
511 if exists($params{'hostname'});
512 push @{$params{'disknames'}}, $params{'diskname'}
513 if exists($params{'diskname'});
514 push @{$params{'levels'}}, $params{'level'}
515 if exists($params{'level'});
516 if ($get_what eq 'parts') {
517 push @{$params{'labels'}}, $params{'label'}
518 if exists($params{'label'});
520 delete $params{'labels'};
523 # specifying write_timestamps implies we won't check holding files
524 if ($params{'write_timestamps'}) {
525 if (defined $params{'holding'} and $params{'holding'}) {
526 return [], []; # well, that's easy..
528 $params{'holding'} = 0;
531 # Since we're working from logfiles, we have to pick the logfiles we'll use first.
532 # Then we can use search_logfile.
534 if ($params{'holding'}) {
535 @logfiles = ( 'holding', );
536 } elsif (exists($params{'write_timestamps'})) {
537 # if we have specific write_timestamps, the job is pretty easy.
538 my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
539 for my $logfile (Amanda::Logfile::find_log()) {
540 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
541 next unless (exists($timestamps_hash{zeropad($timestamp)}));
542 push @logfiles, $logfile;
544 } elsif (exists($params{'dump_timestamps'})) {
545 # otherwise, we need only look in logfiles at or after the earliest dump timestamp
546 my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
547 my $earliest_timestamp = $sorted_timestamps[0];
548 for my $logfile (Amanda::Logfile::find_log()) {
549 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
550 next unless (zeropad($timestamp) ge $earliest_timestamp);
551 push @logfiles, $logfile;
554 # oh well -- it looks like we'll have to read all existing logfiles.
555 @logfiles = Amanda::Logfile::find_log();
558 # Set up some hash tables for speedy lookups of various attributes
559 my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash);
560 %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
561 if (exists($params{'dump_timestamps'}));
562 %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
563 if (exists($params{'hostnames'}));
564 %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
565 if (exists($params{'disknames'}));
566 %levels_hash = map { ($_, undef) } @{$params{'levels'}}
567 if (exists($params{'levels'}));
568 %labels_hash = map { ($_, undef) } @{$params{'labels'}}
569 if (exists($params{'labels'}));
574 # *also* scan holding if the holding param wasn't specified
575 if (!exists $params{'holding'}) {
576 push @logfiles, 'holding';
579 # now loop over those logfiles and use search_logfile to load the dumpfiles
580 # from them, then process each entry from the logfile
581 for my $logfile (@logfiles) {
582 my (@find_results, $write_timestamp);
584 # get the raw contents from search_logfile, or use holding if
586 if ($logfile ne 'holding') {
587 @find_results = Amanda::Logfile::search_logfile(undef, undef,
588 "$logfile_dir/$logfile", 1);
589 # convert to dumpfile hashes, including the write_timestamp from the logfile name
590 my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
591 $write_timestamp = zeropad($timestamp);
594 @find_results = Amanda::Logfile::search_holding_disk();
595 $write_timestamp = '00000000000000';
598 # filter against *_match with dumps_match
599 @find_results = Amanda::Logfile::dumps_match([@find_results],
600 exists($params{'hostname_match'})? $params{'hostname_match'} : undef,
601 exists($params{'diskname_match'})? $params{'diskname_match'} : undef,
602 exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,
606 # loop over each entry in the logfile.
607 for my $find_result (@find_results) {
609 # filter out the non-dump error messages that find.c produces
610 next unless (defined $find_result->{'label'});
612 # bail out on this result early, if possible
613 next if (%dump_timestamps_hash
614 and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
615 next if (%hostnames_hash
616 and !exists($hostnames_hash{$find_result->{'hostname'}}));
617 next if (%disknames_hash
618 and !exists($disknames_hash{$find_result->{'diskname'}}));
619 next if (%levels_hash
620 and !exists($levels_hash{$find_result->{'level'}}));
621 next if (%labels_hash
622 and !exists($labels_hash{$find_result->{'label'}}));
623 if ($get_what eq 'parts') {
624 next if (exists($params{'status'})
625 and defined $find_result->{'status'}
626 and $find_result->{'status'} ne $params{'status'});
629 # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
630 # tendency to produce duplicate results
631 next if ($params{'dumpspecs'}
632 and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
633 $params{'dumpspecs'}, 0));
635 my $dump_timestamp = zeropad($find_result->{'timestamp'});
637 my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
638 $write_timestamp, $find_result->{'level'}, $dump_timestamp);
639 my $dump = $dumps{$dumpkey};
640 if (!defined $dump) {
641 $dump = $dumps{$dumpkey} = {
642 dump_timestamp => $dump_timestamp,
643 write_timestamp => $write_timestamp,
644 hostname => $find_result->{'hostname'},
645 diskname => $find_result->{'diskname'},
646 level => $find_result->{'level'}+0,
647 orig_kb => $find_result->{'orig_kb'},
648 status => $find_result->{'dump_status'},
649 message => $find_result->{'message'},
650 # the rest of these params are unknown until we see a taper
651 # DONE, PARTIAL, or FAIL line, although we count nparts
652 # manually instead of relying on the logfile
653 nparts => 0, # $find_result->{'totalparts'}
654 bytes => -1, # $find_result->{'bytes'}
655 kb => -1, # $find_result->{'kb'}
656 sec => -1, # $find_result->{'sec'}
660 # start setting up a part hash for this result
662 if ($logfile ne 'holding') {
665 label => $find_result->{'label'},
666 filenum => $find_result->{'filenum'},
668 status => $find_result->{'status'} || 'FAILED',
669 sec => $find_result->{'sec'},
670 kb => $find_result->{'kb'},
671 orig_kb => $find_result->{'orig_kb'},
672 partnum => $find_result->{'partnum'},
677 holding_file => $find_result->{'label'},
679 status => $find_result->{'status'} || 'FAILED',
681 kb => $find_result->{'kb'},
682 orig_kb => $find_result->{'orig_kb'},
685 # and fix up the dump, too
686 $dump->{'status'} = $find_result->{'status'} || 'FAILED';
687 $dump->{'bytes'} = $find_result->{'bytes'};
688 $dump->{'kb'} = $find_result->{'kb'};
689 $dump->{'sec'} = $find_result->{'sec'};
692 # weaken the dump ref if we're returning dumps
693 weaken_ref($part{'dump'})
694 if ($get_what eq 'dumps');
696 # count the number of successful parts in the dump
697 $dump->{'nparts'}++ if $part{'status'} eq 'OK';
699 # and add a ref to the array of parts; if we're getting
700 # parts, then this is a weak ref
701 $dump->{'parts'}[$part{'partnum'}] = \%part;
702 weaken_ref($dump->{'parts'}[$part{'partnum'}])
703 if ($get_what eq 'parts');
708 # if these dumps were on the holding disk, then we're done
709 next if $logfile eq 'holding';
711 # re-read the logfile to extract dump-level info that's not captured by
713 my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
714 die "logfile '$logfile' not found" unless $logh;
715 while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
716 next unless $prog == $P_TAPER;
718 if ($type == $L_DONE) {
720 } elsif ($type == $L_PARTIAL) {
722 } elsif ($type == $L_FAIL) {
724 } elsif ($type == $L_SUCCESS) {
730 # now extract the appropriate info; luckily these log lines have the same
731 # format, more or less
732 my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $bytes, $message);
733 ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
734 ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
735 ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
736 if ($status ne 'FAIL' and $type != $L_SUCCESS) { # nparts is not in SUCCESS lines
737 ($nparts, my $str1) = Amanda::Util::skip_quoted_string($str);
738 if (substr($str1, 0,1) ne '[') {
740 } else { # nparts is not in all PARTIAL lines
747 ($level, $str) = Amanda::Util::skip_quoted_string($str);
748 if ($status ne 'FAIL') {
751 ($secs, $b_unit, $kb, $str) = ($str =~ /^\[sec ([-0-9.]+) (kb|bytes) ([-0-9]+).*\] ?(.*)$/)
753 if ($b_unit eq 'bytes') {
759 $secs = 0.1 if ($secs <= 0);
761 if ($status ne 'OK') {
767 $hostname = Amanda::Util::unquote_string($hostname);
768 $diskname = Amanda::Util::unquote_string($diskname);
769 $message = Amanda::Util::unquote_string($message) if $message;
771 # filter against dump criteria
772 next if ($params{'dump_timestamp_match'}
773 and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
774 next if (%dump_timestamps_hash
775 and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));
777 next if ($params{'hostname_match'}
778 and !match_host($params{'hostname_match'}, $hostname));
779 next if (%hostnames_hash
780 and !exists($hostnames_hash{$hostname}));
782 next if ($params{'diskname_match'}
783 and !match_disk($params{'diskname_match'}, $diskname));
784 next if (%disknames_hash
785 and !exists($disknames_hash{$diskname}));
787 next if (%levels_hash
788 and !exists($levels_hash{$level}));
789 # get_dumps filters on status
791 if ($params{'dumpspecs'}) {
793 for my $ds (@{$params{'dumpspecs'}}) {
794 # (the "". are for SWIG's benefit - SWIGged functions don't like
795 # strings generated by SWIG. Long story.)
796 next if (defined $ds->{'host'}
797 and !match_host("".$ds->{'host'}, $hostname));
798 next if (defined $ds->{'disk'}
799 and !match_disk("".$ds->{'disk'}, $diskname));
800 next if (defined $ds->{'datestamp'}
801 and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
802 next if (defined $ds->{'level'}
803 and !match_level("".$ds->{'level'}, $level));
804 next if (defined $ds->{'write_timestamp'}
805 and !match_datestamp("".$ds->{'write_timestamp'}, $write_timestamp));
812 my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp,
813 $level, zeropad($dump_timestamp));
814 my $dump = $dumps{$dumpkey};
815 if (!defined $dump) {
816 # this will happen when a dump has no parts - a FAILed dump.
817 $dump = $dumps{$dumpkey} = {
818 dump_timestamp => zeropad($dump_timestamp),
819 write_timestamp => $write_timestamp,
820 hostname => $hostname,
821 diskname => $diskname,
826 nparts => $nparts, # hopefully 0?
832 $dump->{'message'} = $message;
833 if ($status eq 'FAIL') {
834 $dump->{'bytes'} = 0;
836 $dump->{'sec'} = 0.0;
838 $dump->{'bytes'} = $bytes+0;
839 $dump->{'kb'} = $kb+0;
840 $dump->{'sec'} = $secs+0.0;
843 Amanda::Logfile::close_logfile($logh);
846 return [ values %dumps], \@parts;
850 my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
856 my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
859 if (exists $params{'status'}) {
860 @dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;
867 my ($keys, @parts) = @_;
869 # TODO: make this more efficient by selecting the comparison
870 # functions once, in advance, and just applying them
873 for my $key (@$keys) {
874 my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
876 if ($k =~ /^(partnum|filenum)$/) {
877 # compare part components numerically
878 $res = $a->{$k} <=> $b->{$k};
879 } elsif ($k =~ /^(nparts|level)$/) {
880 # compare dump components numerically
881 $res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
882 } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
883 # compare dump components alphabetically
884 $res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
886 # compare part components alphabetically
887 $res = $a->{$k} cmp $b->{$k};
889 $res = -$res if ($rev eq '-' and $res);
897 my ($keys, @dumps) = @_;
899 # TODO: make this more efficient by selecting the comparison
900 # functions once, in advance, and just applying them
903 for my $key (@$keys) {
904 my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
906 if ($k =~ /^(nparts|level|filenum)$/) {
907 # compare dump components numerically
908 $res = $a->{$k} <=> $b->{$k};
909 } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
910 # compare dump components alphabetically
911 $res = $a->{$k} cmp $b->{$k};
913 $res = -$res if ($rev eq '-' and $res);
920 # caches for add_part() to avoid repeatedly looking up the log
921 # filename for a particular write_timestamp.
922 my $add_part_last_label = undef;
923 my $add_part_last_write_timestamp = undef;
924 my $add_part_last_logfile = undef;
932 my $logdir = getconf($CNF_LOGDIR);
933 my ($last_filenum, $last_secs, $last_kbs);
935 # first order of business is to find out whether we need to make a new
937 my $write_timestamp = zeropad($dump->{'write_timestamp'});
938 die "dump has no 'write_timestamp'" unless defined $write_timestamp;
940 # consult our one-element cache for this label and write_timestamp
941 if (!defined $add_part_last_label
942 or $add_part_last_label ne $dump->{'label'}
943 or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {
946 $add_part_last_logfile = undef;
948 for my $lf (Amanda::Logfile::find_log()) {
949 next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
950 next unless (zeropad($log_timestamp) eq $write_timestamp);
952 # write timestamp matches; now check the label
954 for $find_result (Amanda::Logfile::search_logfile(undef, undef,
956 next unless (defined $find_result->{'label'});
958 if ($find_result->{'label'} eq $dump->{'label'}) {
959 $add_part_last_label = $dump->{'label'};
960 $add_part_last_write_timestamp = $dump->{'write_timestamp'};
961 $add_part_last_logfile = $lf;
967 $logfile = $add_part_last_logfile;
969 # truncate the write_timestamp if we're not using timestamps
970 if (!getconf($CNF_USETIMESTAMPS)) {
971 $write_timestamp = substr($write_timestamp, 0, 8);
974 # get the information on the last dump and part in this logfile, or create
975 # a new logfile if none exists, then open the logfile for writing.
976 if (defined $logfile) {
979 # NOTE: this depends on an implementation detail of search_logfile: it
980 # returns the results in the reverse order of appearance in the logfile.
981 # Since we're concerned with the last elements of this logfile that we
982 # will be appending to shortly, we simply reverse this list. As this
983 # package is rewritten to parse logfiles on its own (or access a relational
984 # database), this implementation detail will no longer be relevant.
985 my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
986 "$logdir/$logfile", 1);
987 for $find_result (@find_results) {
988 # filter out the non-dump error messages that find.c produces
989 next unless (defined $find_result->{'label'});
991 $last_filenum = $find_result->{'filenum'};
993 # if this is part number 1, reset our secs and kbs counters on the
994 # assumption that this is the beginning of a new dump
995 if ($find_result->{'partnum'} == 1) {
996 $last_secs = $last_kbs = 0;
998 $last_secs += $find_result->{'sec'};
999 $last_kbs += $find_result->{'kb'};
1002 open($logfh, ">>", "$logdir/$logfile");
1008 # pick an unused log filename
1011 $logfile = "log.$write_timestamp.$i";
1012 last unless -f "$logdir/$logfile";
1016 open($logfh, ">", "$logdir/$logfile")
1017 or die("Could not write '$logdir/$logfile': $!");
1020 "INFO taper This logfile was generated by Amanda::DB::Catalog\n";
1023 "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";
1025 if (!defined $tapelist) {
1028 # reload the tapelist immediately, in case it's been modified
1029 $tapelist->reload();
1032 # see if we need to add an entry to the tapelist for this dump
1033 if (!grep { $_->{'label'} eq $dump->{'label'}
1034 and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
1035 } @{$tapelist->{tles}}) {
1036 $tapelist->reload(1);
1037 $tapelist->add_tapelabel($write_timestamp, $dump->{'label'}, undef, 1);
1042 if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
1043 warn "Discontinuity in filenums in $logfile: " .
1044 "from $last_filenum to $dump->{filenum}";
1047 my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;
1049 my $part_line = "PART taper ";
1050 $part_line .= "$dump->{label} ";
1051 $part_line .= "$dump->{filenum} ";
1052 $part_line .= quote_string($dump->{hostname}) . " ";
1053 $part_line .= quote_string($dump->{diskname}) . " ";
1054 $part_line .= "$dump->{dump_timestamp} ";
1055 $part_line .= "$dump->{partnum}/$dump->{nparts} ";
1056 $part_line .= "$dump->{level} ";
1057 $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
1058 print $logfh "$part_line\n";
1060 # TODO: we don't always know nparts when writing a part, so
1061 # this is not always an effective way to detect a complete dump.
1062 # However, it works for purposes of data vaulting.
1063 if ($dump->{'partnum'} == $dump->{'nparts'}) {
1064 my $secs = $last_secs + $dump->{'sec'};
1065 my $kbs = $last_kbs + $dump->{'kb'};
1066 $kps = $secs? ($kbs + 0.0) / $secs : 0.0;
1068 my $done_line = "DONE taper ";
1069 $done_line .= quote_string($dump->{hostname}) ." ";
1070 $done_line .= quote_string($dump->{diskname}) ." ";
1071 $done_line .= "$dump->{dump_timestamp} ";
1072 $done_line .= "$dump->{nparts} ";
1073 $done_line .= "$dump->{level} ";
1074 $done_line .= "[sec $secs kb $kbs kps $kps]";
1075 print $logfh "$done_line\n";
1081 sub _load_tapelist {
1082 if (!defined $tapelist) {
1083 my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
1084 $tapelist = Amanda::Tapelist->new($tapelist_filename);
1088 sub _clear_cache { # (used by installcheck)