1 # Copyright (c) 2008, 2009, 2010 Zmanda, Inc. All Rights Reserved.
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License version 2 as published
5 # by the Free Software Foundation.
7 # This program is distributed in the hope that it will be useful, but
8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
9 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 # You should have received a copy of the GNU General Public License along
13 # with this program; if not, write to the Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 # Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
17 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
19 package Amanda::DB::Catalog;
23 Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
27 use Amanda::DB::Catalog;
29 # get all dump timestamps on record
30 my @timestamps = Amanda::DB::Catalog::get_timestamps();
32 # loop over those timestamps, printing dump info for each one
33 for my $timestamp (@timestamps) {
34 my @dumpfiles = Amanda::DB::Catalog::get_parts(
35 timestamp => $timestamp,
39 for my $dumpfile (@dumpfiles) {
40 print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname},
41 " level ", $dumpfile->{level}, "\n";
47 The Amanda catalog is modeled as a set of dumps comprised of parts. A dump is
48 a complete bytestream received from an application, and is uniquely identified
49 by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
50 and C<write_timestamp>. A dump may be partial, or even a complete failure.
52 A part corresponds to a single file on a volume, containing a portion of the
53 data for a dump. A part, then, is completely specified by a volume label and a
54 file number (C<filenum>). Each part has, among other things, a part number
55 (C<partnum>) which gives its relative position within the dump. The bytestream
56 for a dump is recovered by concatenating all of the successful (C<status> = OK)
57 parts matching the dump.
59 Files in the holding disk are considered part of the catalog, and are
60 represented as single-part dumps (holding-disk chunking is ignored, as it is
61 distinct from split parts).
65 The dump table contains one row per dump. It has the following columns:
71 (string) -- timestamp of the run in which the dump was created
75 (string) -- timestamp of the run in which the part was written to this volume,
76 or C<"00000000000000"> for dumps in the holding disk.
80 (string) -- dump hostname
84 (string) -- dump diskname
88 (integer) -- dump level
92 (string) -- The status of the dump - "OK", "PARTIAL", or "FAIL". If a disk
93 failed to dump at all, then it is not part of the catalog and thus will not
94 have an associated dump row.
98 (string) -- reason for PARTIAL or FAIL status
102 (integer) -- number of successful parts in this dump
106 (integer) -- size (in kb) of the dump on disk
110 (integer) -- size (in kb) of the complete dump (before compression or encryption); undef
115 (integer) -- time (in seconds) spent writing this part
119 (arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
120 always C<undef>). When multiple partial parts are available, the choice of the
121 partial that is included in this array is undefined.
125 A dump is represented as a hashref with these keys.
127 The C<write_timestamp> gives the time of the amanda run in which the part was
128 written to this volume. The C<write_timestamp> may differ from the
129 C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
134 The parts table contains one row per part, and has the following columns:
140 (string) -- volume label (not present for holding files)
144 (integer) -- file on that volume (not present for holding files)
148 (string) -- fully-qualified pathname of the holding file (not present for
153 (object ref) -- a reference to the dump containing this part
157 (string) -- The status of the part - "OK", "PARTIAL", or "FAILED".
161 (integer) -- part number of a split part (1-based)
165 (integer) -- size (in kb) of this part
169 (integer) -- time (in seconds) spent writing this part
173 A part is represented as a hashref with these keys. The C<label> and
174 C<filenum> serve as a primary key.
176 Note that parts' C<dump> and dumps' C<parts> create a reference loop. This is
177 broken by making the C<parts> array's contents weak references in C<get_dumps>,
178 and the C<dump> reference weak in C<get_parts>.
182 All timestamps used in this module are full-length, in the format
183 C<YYYYMMDDHHMMSS>. If the underlying data contains only datestamps, they are
184 zero-extended into timestamps: C<YYYYMMDD000000>. A C<dump_timestamp> always
185 corresponds to the initiation of the I<original> dump run, while
186 C<write_timestamp> gives the time the file was written to the volume. When
187 parts are migrated from volume to volume (e.g., by I<amvault>), the
188 C<dump_timestamp> does not change.
190 In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
191 serves as a unique identifier for a dump bytestream, but because the bytestream
192 may appear several times in the catalog (due to vaulting) the additional
193 C<write_timestamp> is required to identify a particular on-storage instance of
194 a dump. Note that the part sizes may differ between instances, so it is not
195 valid to concatenate parts from different dump instances.
201 The following functions provide summary data based on the contents of the
206 =item get_write_timestamps()
208 Get a list of all write timestamps, sorted in chronological order.
210 =item get_latest_write_timestamp()
212 Return the most recent write timestamp.
214 =item get_latest_write_timestamp(type => 'amvault')
215 =item get_latest_write_timestamp(types => [ 'amvault', .. ])
217 Return the timestamp of the most recent dump of the given type or types. The
218 available types are given below for C<get_run_type>.
220 =item get_labels_written_at_timestamp($ts)
222 Return a list of labels for volumes written at the given timestamp.
224 =item get_run_type($ts)
226 Return the type of run made at the given timestamp. The result is one of
227 C<amvault>, C<amdump>, C<amflush>, or the default, C<unknown>.
235 =item get_parts(%parameters)
237 This function returns a sequence of parts. Values in C<%parameters> restrict
238 the set of parts that are returned. The hash can have any of the following
243 =item write_timestamp
245 restrict to parts written at this timestamp
247 =item write_timestamps
249 (arrayref) restrict to parts written at any of these timestamps (note that
250 holding-disk files have no C<write_timestamp>, so this option and the previous
255 restrict to parts with exactly this timestamp
257 =item dump_timestamps
259 (arrayref) restrict to parts with any of these timestamps
261 =item dump_timestamp_match
263 restrict to parts with timestamps matching this expression
267 if true, only return dumps on holding disk. If false, omit dumps on holding
272 restrict to parts with exactly this hostname
276 (arrayref) restrict to parts with any of these hostnames
280 restrict to parts with hostnames matching this expression
284 restrict to parts with exactly this diskname
288 (arrayref) restrict to parts with any of these disknames
292 restrict to parts with disknames matching this expression
296 restrict to parts with exactly this label
300 (arrayref) restrict to parts with any of these labels
304 restrict to parts with exactly this level
308 (arrayref) restrict to parts with any of these levels
312 restrict to parts with this status
316 (arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs
320 Match expressions are described in the amanda(8) manual page.
322 =item sort_parts([ $key1, $key2, .. ], @parts)
324 Given a list of parts, this function sorts that list by the requested keys.
325 The following keys are available:
333 =item write_timestamp
343 Note that this sorts labels I<lexically>, not necessarily in the order they were used!
351 Keys are processed from left to right: if two dumps have the same value for
352 C<$key1>, then C<$key2> is examined, and so on. Key names may be prefixed by a
353 dash (C<->) to reverse the order.
355 Note that some of these keys are dump keys; the function will automatically
356 access those values via the C<dump> attribute.
364 =item get_dumps(%parameters)
366 This function returns a sequence of dumps. Values in C<%parameters> restrict
367 the set of dumps that are returned. The same keys as are used for C<get_parts>
368 are available here, with the exception of C<label> and C<labels>. In this
369 case, the C<status> parameter applies to the dump status, not the status of its
372 =item sort_dumps([ $key1, $key2 ], @dumps)
374 Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
375 The same keys are available, with the exception of C<label>, C<filenum>, and
384 =item add_part($part)
386 Add the given part to the database. In terms of logfiles, this will either
387 create a new logfile (if the part's C<write_timestamp> has not been seen
388 before) or append to an existing logfile. Note that a new logfile will require
389 a corresponding new entry in the tapelist.
391 Note that no locking is performed: multiple simultaneous calls to this function
392 can result in a corrupted or incorrect logfile.
400 use Amanda::Logfile qw( :constants match_disk match_host
401 match_datestamp match_level );
402 use Amanda::Tapelist;
403 use Amanda::Config qw( :init :getconf config_dir_relative );
404 use Amanda::Util qw( quote_string weaken_ref );
405 use File::Glob qw( :glob );
410 my $tapelist = undef;
414 my ($timestamp) = @_;
415 if (length($timestamp) == 8) {
416 return $timestamp."000000";
421 sub get_write_timestamps {
424 # find_log assumes that the tapelist has been loaded, so load it now
427 for (Amanda::Logfile::find_log()) {
428 next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
429 push @rv, zeropad($timestamp);
435 sub get_latest_write_timestamp {
438 if ($params{'type'}) {
439 push @{$params{'types'}}, $params{'type'};
442 # get all of the timestamps and select the last one
443 my @timestamps = get_write_timestamps();
446 # if we're not looking for a particular type, then this is easy
447 if (!exists $params{'types'}) {
448 return $timestamps[-1];
451 # otherwise we need to search backward until we find a logfile of
453 while (@timestamps) {
454 my $ts = pop @timestamps;
455 my $typ = get_run_type($ts);
456 if (grep { $_ eq $typ } @{$params{'types'}}) {
466 my ($write_timestamp) = @_;
468 # find all of the logfiles with that name
469 my $logdir = getconf($CNF_LOGDIR);
470 my @matches = File::Glob::bsd_glob("$logdir/log.$write_timestamp.*", GLOB_NOSORT);
471 if ($write_timestamp =~ /000000$/) {
472 my $write_datestamp = substr($write_timestamp, 0, 8);
473 push @matches, File::Glob::bsd_glob("$logdir/log.$write_datestamp.*", GLOB_NOSORT);
476 for my $lf (@matches) {
477 open(my $fh, "<", $lf) or next;
479 # amflush and amvault put their own names in
480 return $1 if (/^START (amflush|amvault)/);
481 # but for amdump we see planner
482 return 'amdump' if (/^START planner/);
490 # this generic function implements the loop of scanning logfiles to find
491 # the requested data; get_parts and get_dumps then adjust the results to
492 # match what the user expects.
493 sub get_parts_and_dumps {
494 my $get_what = shift; # "parts" or "dumps"
496 my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
498 # find_log assumes that the tapelist has been loaded, so load it now
501 # pre-process params by appending all of the "singular" parameters to the "plurals"
502 push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'}
503 if exists($params{'write_timestamp'});
504 push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'}
505 if exists($params{'dump_timestamp'});
506 push @{$params{'hostnames'}}, $params{'hostname'}
507 if exists($params{'hostname'});
508 push @{$params{'disknames'}}, $params{'diskname'}
509 if exists($params{'diskname'});
510 push @{$params{'levels'}}, $params{'level'}
511 if exists($params{'level'});
512 if ($get_what eq 'parts') {
513 push @{$params{'labels'}}, $params{'label'}
514 if exists($params{'label'});
516 delete $params{'labels'};
519 # specifying write_timestamps implies we won't check holding files
520 if ($params{'write_timestamps'}) {
521 if (defined $params{'holding'} and $params{'holding'}) {
522 return [], []; # well, that's easy..
524 $params{'holding'} = 0;
527 # Since we're working from logfiles, we have to pick the logfiles we'll use first.
528 # Then we can use search_logfile.
530 if ($params{'holding'}) {
531 @logfiles = ( 'holding', );
532 } elsif (exists($params{'write_timestamps'})) {
533 # if we have specific write_timestamps, the job is pretty easy.
534 my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
535 for my $logfile (Amanda::Logfile::find_log()) {
536 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
537 next unless (exists($timestamps_hash{zeropad($timestamp)}));
538 push @logfiles, $logfile;
540 } elsif (exists($params{'dump_timestamps'})) {
541 # otherwise, we need only look in logfiles at or after the earliest dump timestamp
542 my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
543 my $earliest_timestamp = $sorted_timestamps[0];
544 for my $logfile (Amanda::Logfile::find_log()) {
545 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
546 next unless (zeropad($timestamp) ge $earliest_timestamp);
547 push @logfiles, $logfile;
550 # oh well -- it looks like we'll have to read all existing logfiles.
551 @logfiles = Amanda::Logfile::find_log();
554 # Set up some hash tables for speedy lookups of various attributes
555 my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash);
556 %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
557 if (exists($params{'dump_timestamps'}));
558 %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
559 if (exists($params{'hostnames'}));
560 %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
561 if (exists($params{'disknames'}));
562 %levels_hash = map { ($_, undef) } @{$params{'levels'}}
563 if (exists($params{'levels'}));
564 %labels_hash = map { ($_, undef) } @{$params{'labels'}}
565 if (exists($params{'labels'}));
570 # *also* scan holding if the holding param wasn't specified
571 if (!exists $params{'holding'}) {
572 push @logfiles, 'holding';
575 # now loop over those logfiles and use search_logfile to load the dumpfiles
576 # from them, then process each entry from the logfile
577 for my $logfile (@logfiles) {
578 my (@find_results, $write_timestamp);
580 # get the raw contents from search_logfile, or use holding if
582 if ($logfile ne 'holding') {
583 @find_results = Amanda::Logfile::search_logfile(undef, undef,
584 "$logfile_dir/$logfile", 1);
585 # convert to dumpfile hashes, including the write_timestamp from the logfile name
586 my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
587 $write_timestamp = zeropad($timestamp);
590 @find_results = Amanda::Logfile::search_holding_disk();
591 $write_timestamp = '00000000000000';
594 # filter against *_match with dumps_match
595 @find_results = Amanda::Logfile::dumps_match([@find_results],
596 exists($params{'hostname_match'})? $params{'hostname_match'} : undef,
597 exists($params{'diskname_match'})? $params{'diskname_match'} : undef,
598 exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,
602 # loop over each entry in the logfile.
603 for my $find_result (@find_results) {
605 # filter out the non-dump error messages that find.c produces
606 next unless (defined $find_result->{'label'});
608 # bail out on this result early, if possible
609 next if (%dump_timestamps_hash
610 and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
611 next if (%hostnames_hash
612 and !exists($hostnames_hash{$find_result->{'hostname'}}));
613 next if (%disknames_hash
614 and !exists($disknames_hash{$find_result->{'diskname'}}));
615 next if (%levels_hash
616 and !exists($levels_hash{$find_result->{'level'}}));
617 next if (%labels_hash
618 and !exists($labels_hash{$find_result->{'label'}}));
619 if ($get_what eq 'parts') {
620 next if (exists($params{'status'})
621 and defined $find_result->{'status'}
622 and $find_result->{'status'} ne $params{'status'});
625 # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
626 # tendency to produce duplicate results
627 next if ($params{'dumpspecs'}
628 and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
629 $params{'dumpspecs'}, 0));
631 my $dump_timestamp = zeropad($find_result->{'timestamp'});
633 my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
634 $write_timestamp, $find_result->{'level'}, $dump_timestamp);
635 my $dump = $dumps{$dumpkey};
636 if (!defined $dump) {
637 $dump = $dumps{$dumpkey} = {
638 dump_timestamp => $dump_timestamp,
639 write_timestamp => $write_timestamp,
640 hostname => $find_result->{'hostname'},
641 diskname => $find_result->{'diskname'},
642 level => $find_result->{'level'}+0,
643 orig_kb => $find_result->{'orig_kb'},
644 status => $find_result->{'dump_status'},
645 message => $find_result->{'message'},
646 # the rest of these params are unknown until we see a taper
647 # DONE, PARTIAL, or FAIL line, although we count nparts
648 # manually instead of relying on the logfile
655 # start setting up a part hash for this result
657 if ($logfile ne 'holding') {
660 label => $find_result->{'label'},
661 filenum => $find_result->{'filenum'},
663 status => $find_result->{'status'} || 'FAILED',
664 sec => $find_result->{'sec'},
665 kb => $find_result->{'kb'},
666 orig_kb => $find_result->{'orig_kb'},
667 partnum => $find_result->{'partnum'},
672 holding_file => $find_result->{'label'},
674 status => $find_result->{'status'} || 'FAILED',
676 kb => $find_result->{'kb'},
677 orig_kb => $find_result->{'orig_kb'},
680 # and fix up the dump, too
681 $dump->{'status'} = $find_result->{'status'} || 'FAILED';
682 $dump->{'kb'} = $find_result->{'kb'};
683 $dump->{'sec'} = $find_result->{'sec'};
686 # weaken the dump ref if we're returning dumps
687 weaken_ref($part{'dump'})
688 if ($get_what eq 'dumps');
690 # count the number of successful parts in the dump
691 $dump->{'nparts'}++ if $part{'status'} eq 'OK';
693 # and add a ref to the array of parts; if we're getting
694 # parts, then this is a weak ref
695 $dump->{'parts'}[$part{'partnum'}] = \%part;
696 weaken_ref($dump->{'parts'}[$part{'partnum'}])
697 if ($get_what eq 'parts');
702 # if these dumps were on the holding disk, then we're done
703 next if $logfile eq 'holding';
705 # re-read the logfile to extract dump-level info that's not captured by
707 my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
708 die "logfile '$logfile' not found" unless $logh;
709 while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
710 next unless $prog == $P_TAPER;
712 if ($type == $L_DONE) {
714 } elsif ($type == $L_PARTIAL) {
716 } elsif ($type == $L_FAIL) {
718 } elsif ($type == $L_SUCCESS) {
724 # now extract the appropriate info; luckily these log lines have the same
725 # format, more or less
726 my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $message);
727 ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
728 ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
729 ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
730 if ($status ne 'FAIL' and $type != $L_SUCCESS) { # nparts is not in SUCCESS lines
731 ($nparts, $str) = Amanda::Util::skip_quoted_string($str);
735 ($level, $str) = Amanda::Util::skip_quoted_string($str);
736 if ($status ne 'FAIL') {
738 ($secs, $kb, $str) = ($str =~ /^\[sec ([-0-9.]+) kb (\d+).*\] ?(.*)$/)
740 $secs = 0.1 if ($secs <= 0);
742 if ($status ne 'OK') {
748 $hostname = Amanda::Util::unquote_string($hostname);
749 $diskname = Amanda::Util::unquote_string($diskname);
750 $message = Amanda::Util::unquote_string($message) if $message;
752 # filter against dump criteria
753 next if ($params{'dump_timestamp_match'}
754 and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
755 next if (%dump_timestamps_hash
756 and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));
758 next if ($params{'hostname_match'}
759 and !match_host($params{'hostname_match'}, $hostname));
760 next if (%hostnames_hash
761 and !exists($hostnames_hash{$hostname}));
763 next if ($params{'diskname_match'}
764 and !match_disk($params{'diskname_match'}, $diskname));
765 next if (%disknames_hash
766 and !exists($disknames_hash{$diskname}));
768 next if (%levels_hash
769 and !exists($levels_hash{$level}));
770 # get_dumps filters on status
772 if ($params{'dumpspecs'}) {
774 for my $ds (@{$params{'dumpspecs'}}) {
775 # (the "". are for SWIG's benefit - SWIGged functions don't like
776 # strings generated by SWIG. Long story.)
777 next if (defined $ds->{'host'}
778 and !match_host("".$ds->{'host'}, $hostname));
779 next if (defined $ds->{'disk'}
780 and !match_disk("".$ds->{'disk'}, $diskname));
781 next if (defined $ds->{'datestamp'}
782 and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
783 next if (defined $ds->{'level'}
784 and !match_level("".$ds->{'level'}, $level));
785 next if (defined $ds->{'write_timestamp'}
786 and !match_datestamp("".$ds->{'write_timestamp'}, $write_timestamp));
793 my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp,
794 $level, zeropad($dump_timestamp));
795 my $dump = $dumps{$dumpkey};
796 if (!defined $dump) {
797 # this will happen when a dump has no parts - a FAILed dump.
798 $dump = $dumps{$dumpkey} = {
799 dump_timestamp => zeropad($dump_timestamp),
800 write_timestamp => $write_timestamp,
801 hostname => $hostname,
802 diskname => $diskname,
807 nparts => $nparts, # hopefully 0?
813 $dump->{'message'} = $message;
814 if ($status eq 'FAIL') {
816 $dump->{'sec'} = 0.0;
818 $dump->{'kb'} = $kb+0;
819 $dump->{'sec'} = $secs+0.0;
822 Amanda::Logfile::close_logfile($logh);
825 return [ values %dumps], \@parts;
829 my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
835 my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
838 if (exists $params{'status'}) {
839 @dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;
846 my ($keys, @parts) = @_;
848 # TODO: make this more efficient by selecting the comparison
849 # functions once, in advance, and just applying them
852 for my $key (@$keys) {
853 my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
855 if ($k =~ /^(partnum|filenum)$/) {
856 # compare part components numerically
857 $res = $a->{$k} <=> $b->{$k};
858 } elsif ($k =~ /^(nparts|level)$/) {
859 # compare dump components numerically
860 $res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
861 } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
862 # compare dump components alphabetically
863 $res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
865 # compare part components alphabetically
866 $res = $a->{$k} cmp $b->{$k};
868 $res = -$res if ($rev eq '-' and $res);
876 my ($keys, @dumps) = @_;
878 # TODO: make this more efficient by selecting the comparison
879 # functions once, in advance, and just applying them
882 for my $key (@$keys) {
883 my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
885 if ($k =~ /^(nparts|level)$/) {
886 # compare dump components numerically
887 $res = $a->{$k} <=> $b->{$k};
888 } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
889 # compare dump components alphabetically
890 $res = $a->{$k} cmp $b->{$k};
892 $res = -$res if ($rev eq '-' and $res);
899 # caches for add_part() to avoid repeatedly looking up the log
900 # filename for a particular write_timestamp.
901 my $add_part_last_label = undef;
902 my $add_part_last_write_timestamp = undef;
903 my $add_part_last_logfile = undef;
911 my $logdir = getconf($CNF_LOGDIR);
912 my ($last_filenum, $last_secs, $last_kbs);
914 # first order of business is to find out whether we need to make a new
916 my $write_timestamp = zeropad($dump->{'write_timestamp'});
917 die "dump has no 'write_timestamp'" unless defined $write_timestamp;
919 # consult our one-element cache for this label and write_timestamp
920 if (!defined $add_part_last_label
921 or $add_part_last_label ne $dump->{'label'}
922 or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {
925 $add_part_last_logfile = undef;
927 for my $lf (Amanda::Logfile::find_log()) {
928 next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
929 next unless (zeropad($log_timestamp) eq $write_timestamp);
931 # write timestamp matches; now check the label
933 for $find_result (Amanda::Logfile::search_logfile(undef, undef,
935 next unless (defined $find_result->{'label'});
937 if ($find_result->{'label'} eq $dump->{'label'}) {
938 $add_part_last_label = $dump->{'label'};
939 $add_part_last_write_timestamp = $dump->{'write_timestamp'};
940 $add_part_last_logfile = $lf;
946 $logfile = $add_part_last_logfile;
948 # truncate the write_timestamp if we're not using timestamps
949 if (!getconf($CNF_USETIMESTAMPS)) {
950 $write_timestamp = substr($write_timestamp, 0, 8);
953 # get the information on the last dump and part in this logfile, or create
954 # a new logfile if none exists, then open the logfile for writing.
955 if (defined $logfile) {
958 # NOTE: this depends on an implementation detail of search_logfile: it
959 # returns the results in the reverse order of appearance in the logfile.
960 # Since we're concerned with the last elements of this logfile that we
961 # will be appending to shortly, we simply reverse this list. As this
962 # package is rewritten to parse logfiles on its own (or access a relational
963 # database), this implementation detail will no longer be relevant.
964 my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
965 "$logdir/$logfile", 1);
966 for $find_result (@find_results) {
967 # filter out the non-dump error messages that find.c produces
968 next unless (defined $find_result->{'label'});
970 $last_filenum = $find_result->{'filenum'};
972 # if this is part number 1, reset our secs and kbs counters on the
973 # assumption that this is the beginning of a new dump
974 if ($find_result->{'partnum'} == 1) {
975 $last_secs = $last_kbs = 0;
977 $last_secs += $find_result->{'sec'};
978 $last_kbs += $find_result->{'kb'};
981 open($logfh, ">>", "$logdir/$logfile");
987 # pick an unused log filename
990 $logfile = "log.$write_timestamp.$i";
991 last unless -f "$logdir/$logfile";
995 open($logfh, ">", "$logdir/$logfile")
996 or die("Could not write '$logdir/$logfile': $!");
999 "INFO taper This logfile was generated by Amanda::DB::Catalog\n";
1002 "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";
1004 if (!defined $tapelist) {
1007 # reload the tapelist immediately, in case it's been modified
1008 $tapelist->reload();
1011 # see if we need to add an entry to the tapelist for this dump
1012 if (!grep { $_->{'label'} eq $dump->{'label'}
1013 and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
1014 } @{$tapelist->{tles}}) {
1015 $tapelist->reload(1);
1016 $tapelist->add_tapelabel($write_timestamp, $dump->{'label'}, undef, 1);
1021 if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
1022 warn "Discontinuity in filenums in $logfile: " .
1023 "from $last_filenum to $dump->{filenum}";
1026 my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;
1028 my $part_line = "PART taper ";
1029 $part_line .= "$dump->{label} ";
1030 $part_line .= "$dump->{filenum} ";
1031 $part_line .= quote_string($dump->{hostname}) . " ";
1032 $part_line .= quote_string($dump->{diskname}) . " ";
1033 $part_line .= "$dump->{dump_timestamp} ";
1034 $part_line .= "$dump->{partnum}/$dump->{nparts} ";
1035 $part_line .= "$dump->{level} ";
1036 $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
1037 print $logfh "$part_line\n";
1039 # TODO: we don't always know nparts when writing a part, so
1040 # this is not always an effective way to detect a complete dump.
1041 # However, it works for purposes of data vaulting.
1042 if ($dump->{'partnum'} == $dump->{'nparts'}) {
1043 my $secs = $last_secs + $dump->{'sec'};
1044 my $kbs = $last_kbs + $dump->{'kb'};
1045 $kps = $secs? ($kbs + 0.0) / $secs : 0.0;
1047 my $done_line = "DONE taper ";
1048 $done_line .= quote_string($dump->{hostname}) ." ";
1049 $done_line .= quote_string($dump->{diskname}) ." ";
1050 $done_line .= "$dump->{dump_timestamp} ";
1051 $done_line .= "$dump->{nparts} ";
1052 $done_line .= "$dump->{level} ";
1053 $done_line .= "[sec $secs kb $kbs kps $kps]";
1054 print $logfh "$done_line\n";
1060 sub _load_tapelist {
1061 if (!defined $tapelist) {
1062 my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
1063 $tapelist = Amanda::Tapelist->new($tapelist_filename);
1067 sub _clear_cache { # (used by installcheck)