1 # Copyright (c) 2006 Zmanda Inc. All Rights Reserved.
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License version 2 as published
5 # by the Free Software Foundation.
7 # This program is distributed in the hope that it will be useful, but
8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
9 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 # You should have received a copy of the GNU General Public License along
13 # with this program; if not, write to the Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 # Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
17 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
19 package Amanda::DB::Catalog;
23 Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
27 use Amanda::DB::Catalog;
29 # get all dump timestamps on record
30 my @timestamps = Amanda::DB::Catalog::get_timestamps();
32 # loop over those timestamps, printing dump info for each one
33 for my $timestamp (@timestamps) {
34 my @dumpfiles = Amanda::DB::Catalog::get_dumps(
35 timestamp => $timestamp,
39 for my $dumpfile (@dumpfiles) {
40 print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname},
41 " level ", $dumpfile->{level}, "\n";
49 The Amanda catalog is a set of dumpfiles, where each dumpfile corresponds to a
50 single file in a storage volume. On tapes, files are separated by filemarks
51 and numbered sequentially. This model is preserved on non-tape media such as
52 the VFS and S3 devices. A dumpfile, then, is completely specified by a volume
53 label and a file number (I<filenum>).
55 The catalog is presented as a single table containing one row per dumpfile.
56 Each row has the following values:
62 (string) -- volume label
66 (integer) -- file on that volume
70 (string) -- timestamp of the run in which the dump was created
74 (string) -- timestamp of the run in which the dump was written to this volume
78 (string) -- dump hostname
82 (string) -- dump diskname
86 (integer) -- dump level
90 (string) -- "OK", "PARTIAL" or some other descriptor
94 (integer) -- part number of a split dump (1-based)
98 (integer) -- number of parts in this dump (estimated)
102 (integer) -- size (in kb) of this dumpfile
106 (integer) -- time (in seconds) spent writing this dumpfile
110 A dumpfile is represented as a hashref with these keys.
112 The label and filenum serve as a primary key. The dump_timestamp, hostname,
113 diskname, and level uniquely identify the dump. The write_timestamp gives the
114 time that the dump was written to this volume. The write_timestamp may differ
115 from the dump_timestamp if, for example, I<amflush> wrote the dump to tape
116 after the initial dump. The remaining fields are informational.
120 A dumpfile may be a part of a larger (split) dump, or may be partial (due to
121 end of tape or some other error), so the contents of the catalog require some
122 interpretation in order to find a particular dump.
124 All timestamps used in this module are full-length, in the format
125 C<YYYYMMDDHHMMSS>. If the underlying data contains only datestamps, they are
126 zero-extended into timestamps: C<YYYYMMDD000000>. A dump_timestamp always
127 corresponds to the initiation of the I<original> dump run, while
128 write_timestamp gives the time the file was written to the volume. When
129 dumpfiles are migrated from volume to volume (e.g., by I<amflush>), the
130 dump_timestamp does not change.
132 In Amanda, the tuple (hostname, diskname, level, dump_timestamp) serves as a unique
133 identifier for a dump. Since all of this information is preserved during
134 migrations, a catalog query with these four terms will return all dumpfiles
135 relevant to that dump.
139 This API is read-only at the moment. The following functions are available:
143 =item get_write_timestamps()
145 Get a list of all write timestamps, sorted in chronological order.
147 =item get_latest_write_timestamp()
149 Return the most recent write timestamp.
151 =item get_labels_written_at_timestamp($ts)
153 Return a list of labels for volumes written at the given timestamp.
155 =item get_dumps(%parameters)
157 This function is the workhorse query interface, and returns a sequence of
158 dumpfiles. Values in C<%parameters> restrict the set of dumpfiles that are
159 returned. The hash can have any of the following keys:
163 =item write_timestamp
165 restrict to dumpfiles written at this timestamp
167 =item write_timestamps
169 (arrayref) restrict to dumpfiles written at any of these timestamps
173 restrict to dumpfiles with exactly this timestamp
175 =item dump_timestamps
177 (arrayref) restrict to dumpfiles with any of these timestamps
179 =item dump_timestamp_match
181 restrict to dumpfiles with timestamps matching this expression
185 restrict to dumpfiles with exactly this hostname
189 (arrayref) restrict to dumpfiles with any of these hostnames
193 restrict to dumpfiles with hostnames matching this expression
197 restrict to dumpfiles with exactly this diskname
201 (arrayref) restrict to dumpfiles with any of these disknames
205 restrict to dumpfiles with disknames matching this expression
209 restrict to dumpfiles with exactly this label
213 (arrayref) restrict to dumpfiles with any of these labels
217 restrict to dumpfiles with exactly this level
221 (arrayref) restrict to dumpfiles with any of these levels
225 restrict to dumpfiles with this status
229 Match expressions are described in the amanda(8) manual page.
231 =item sort_dumps([ $key1, $key2, .. ], @dumps)
233 Given a list of dumps, this function sorts that list by the requested keys.
234 The following keys are available:
242 =item write_timestamp
260 Keys are processed from left to right: if two dumps have the same value for
261 C<$key1>, then C<$key2> is examined, and so on. Key names may be prefixed by
262 "C<->" to reverse the order.
264 =item add_dump($dumpfile)
266 Add the given dumpfile to the database. In terms of logfiles, this will either
267 create a new logfile (if the dump's C<write_timestamp> has not been seen
268 before) or append to an existing logfile. Note that a new logfile will require
269 a corresponding new entry in the tapelist.
271 Note that no locking is performed: multiple simultaneous calls to this function
272 can result in a corrupted or incorrect logfile.
278 New summary functions may be added to reduce code duplication in other parts of
281 Support for loading and modifying the tapelist may eventually be folded into
287 use Amanda::Tapelist;
288 use Amanda::Config qw( :init :getconf config_dir_relative );
289 use Amanda::Util qw( quote_string );
294 my $tapelist = undef;
295 my $tapelist_filename = undef;
299 my ($timestamp) = @_;
300 if (length($timestamp) == 8) {
301 return $timestamp."000000";
306 sub get_write_timestamps {
309 # find_log assumes that the tapelist has been loaded, so load it now
312 for (Amanda::Logfile::find_log()) {
313 next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
314 push @rv, zeropad($timestamp);
320 sub get_latest_write_timestamp {
321 # get all of the timestamps and select the last one
322 my @timestamps = get_write_timestamps();
325 return $timestamps[-1];
333 my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
335 # find_log assumes that the tapelist has been loaded, so load it now
338 # pre-process params by appending all of the "singular" parameters to the "plurals"
339 push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'}
340 if exists($params{'write_timestamp'});
341 push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'}
342 if exists($params{'dump_timestamp'});
343 push @{$params{'hostnames'}}, $params{'hostname'}
344 if exists($params{'hostname'});
345 push @{$params{'disknames'}}, $params{'diskname'}
346 if exists($params{'diskname'});
347 push @{$params{'levels'}}, $params{'level'}
348 if exists($params{'level'});
349 push @{$params{'labels'}}, $params{'label'}
350 if exists($params{'label'});
352 # Since we're working from logfiles, we have to pick the logfiles we'll use first.
353 # Then we can use search_logfile.
355 if (exists($params{'write_timestamps'})) {
356 # if we have specific write_timestamps, the job is pretty easy.
357 my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
358 for my $logfile (Amanda::Logfile::find_log()) {
359 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
360 next unless (exists($timestamps_hash{zeropad($timestamp)}));
361 push @logfiles, $logfile;
363 } elsif (exists($params{'dump_timestamps'})) {
364 # otherwise, we need only look in logfiles at or after the earliest dump timestamp
365 my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
366 my $earliest_timestamp = $sorted_timestamps[0];
367 for my $logfile (Amanda::Logfile::find_log()) {
368 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
369 next unless (zeropad($timestamp) ge $earliest_timestamp);
370 push @logfiles, $logfile;
373 # oh well -- it looks like we'll have to read all existing logfiles.
374 @logfiles = Amanda::Logfile::find_log();
377 # Set up some hash tables for speedy lookups of various attributes
378 my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash);
379 %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
380 if (exists($params{'dump_timestamps'}));
381 %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
382 if (exists($params{'hostnames'}));
383 %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
384 if (exists($params{'disknames'}));
385 %levels_hash = map { ($_, undef) } @{$params{'levels'}}
386 if (exists($params{'levels'}));
387 %labels_hash = map { ($_, undef) } @{$params{'labels'}}
388 if (exists($params{'labels'}));
390 # now loop over those logfiles and use search_logfile to load the dumpfiles
391 # from them, then process each entry from the logfile
393 for my $logfile (@logfiles) {
394 # get the raw contents from search_logfile
395 my @find_results = Amanda::Logfile::search_logfile(undef, undef,
396 "$logfile_dir/$logfile", 1);
398 # filter against *_match with dumps_match
399 @find_results = Amanda::Logfile::dumps_match([@find_results],
400 exists($params{'hostname_match'})? $params{'hostname_match'} : undef,
401 exists($params{'diskname_match'})? $params{'diskname_match'} : undef,
402 exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,
406 # convert to dumpfile hashes, including the write_timestamp from the logfile name
407 my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
408 my $write_timestamp = zeropad($timestamp);
410 # loop over each entry in the logfile.
411 for my $find_result (@find_results) {
413 # filter out the non-dump error messages that find.c produces
414 next unless (defined $find_result->{'label'});
416 # bail out on this result early, if possible
417 next if (%dump_timestamps_hash
418 and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
419 next if (%hostnames_hash
420 and !exists($hostnames_hash{$find_result->{'hostname'}}));
421 next if (%disknames_hash
422 and !exists($disknames_hash{$find_result->{'diskname'}}));
423 next if (%levels_hash
424 and !exists($levels_hash{$find_result->{'level'}}));
425 next if (%labels_hash
426 and !exists($labels_hash{$find_result->{'label'}}));
427 next if (exists($params{'status'})
428 and $find_result->{'status'} ne $params{'status'});
430 # start setting up a dumpfile hash for this result
432 'write_timestamp' => $write_timestamp,
433 'dump_timestamp' => zeropad($find_result->{'timestamp'}),
434 'hostname' => $find_result->{'hostname'},
435 'diskname' => $find_result->{'diskname'},
436 'level' => $find_result->{'level'},
437 'label' => $find_result->{'label'},
438 'filenum' => $find_result->{'filenum'},
439 'status' => $find_result->{'status'},
440 'sec' => $find_result->{'sec'},
441 'kb' => $find_result->{'kb'},
444 # partnum and nparts takes some special interpretation
445 if (my ($partnum, $nparts) = $find_result->{'partnum'} =~ m$(\d+)/(-?\d+)$) {
446 $dumpfile{'partnum'} = $partnum+0;
447 $dumpfile{'nparts'} = $nparts+0;
449 $dumpfile{'partnum'} = 1;
450 $dumpfile{'nparts'} = 1;
453 # check partnum and nparts
454 next if (defined($params{'partnum'}) and $dumpfile{'partnum'} != $params{'partnum'});
455 next if (defined($params{'nparts'}) and $dumpfile{'nparts'} != $params{'nparts'});
457 push @results, \%dumpfile;
465 my ($keys, @dumps) = @_;
469 for my $key (@$keys) {
470 if ($key =~ /^-(.*)$/) {
471 $r = $b->{$1} cmp $a->{$1}; # note: $a and $b are reversed
473 $r = $a->{$key} cmp $b->{$key};
481 # caches for add_dump() to avoid repeatedly looking up the log
482 # filename for a particular write_timestamp.
483 my $add_dump_last_label = undef;
484 my $add_dump_last_write_timestamp = undef;
485 my $add_dump_last_logfile = undef;
493 my $logdir = getconf($CNF_LOGDIR);
494 my ($last_filenum, $last_secs, $last_kbs);
496 # first order of business is to find out whether we need to make a new
498 my $write_timestamp = zeropad($dump->{'write_timestamp'});
499 die "dump has no 'write_timestamp'" unless defined $write_timestamp;
501 # consult our one-element cache for this label and write_timestamp
502 if (!defined $add_dump_last_label
503 or $add_dump_last_label ne $dump->{'label'}
504 or $add_dump_last_write_timestamp ne $dump->{'write_timestamp'}) {
507 $add_dump_last_logfile = undef;
509 for my $lf (Amanda::Logfile::find_log()) {
510 next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
511 next unless (zeropad($log_timestamp) eq $write_timestamp);
513 # write timestamp matches; now check the label
515 for $find_result (Amanda::Logfile::search_logfile(undef, undef,
517 next unless (defined $find_result->{'label'});
519 if ($find_result->{'label'} eq $dump->{'label'}) {
520 $add_dump_last_label = $dump->{'label'};
521 $add_dump_last_write_timestamp = $dump->{'write_timestamp'};
522 $add_dump_last_logfile = $lf;
528 $logfile = $add_dump_last_logfile;
530 # truncate the write_timestamp if we're not using timestamps
531 if (!getconf($CNF_USETIMESTAMPS)) {
532 $write_timestamp = substr($write_timestamp, 0, 8);
535 # get the information on the last dump and part in this logfile, or create
536 # a new logfile if none exists, then open the logfile for writing.
537 if (defined $logfile) {
540 # NOTE: this depends on an implementation detail of search_logfile: it
541 # returns the results in the reverse order of appearance in the logfile.
542 # Since we're concerned with the last elements of this logfile that we
543 # will be appending to shortly, we simply reverse this list. As this
544 # package is rewritten to parse logfiles on its own (or access a relational
545 # database), this implementation detail will no longer be relevant.
546 my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
547 "$logdir/$logfile", 1);
548 for $find_result (@find_results) {
549 # filter out the non-dump error messages that find.c produces
550 next unless (defined $find_result->{'label'});
552 $last_filenum = $find_result->{'filenum'};
554 # if this is part number 1, reset our secs and kbs counters on the
555 # assumption that this is the beginning of a new dump
556 if ($find_result->{'partnum'} =~ qr{1/\d}) {
557 $last_secs = $last_kbs = 0;
559 $last_secs += $find_result->{'sec'};
560 $last_kbs += $find_result->{'kb'};
563 open($logfh, ">>", "$logdir/$logfile");
569 # pick an unused log filename
572 $logfile = "log.$write_timestamp.$i";
573 last unless -f "$logdir/$logfile";
577 open($logfh, ">", "$logdir/$logfile")
578 or die("Could not write '$logdir/$logfile': $!");
581 "INFO taper This logfile was generated by Amanda::DB::Catalog\n";
584 "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";
586 if (!defined $tapelist_filename) {
587 $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
590 # reload the tapelist immediately, in case it's been modified
591 $tapelist = Amanda::Tapelist::read_tapelist($tapelist_filename);
593 # see if we need to add an entry to the tapelist for this dump
594 if (!grep { $_->{'label'} eq $dump->{'label'}
595 and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
597 $tapelist->add_tapelabel($write_timestamp, $dump->{'label'});
598 $tapelist->write($tapelist_filename);
602 if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
603 warn "Discontinuity in filenums in $logfile: " .
604 "from $last_filenum to $dump->{filenum}";
607 my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;
609 my $part_line = "PART taper ";
610 $part_line .= "$dump->{label} ";
611 $part_line .= "$dump->{filenum} ";
612 $part_line .= quote_string($dump->{hostname}) . " ";
613 $part_line .= quote_string($dump->{diskname}) . " ";
614 $part_line .= "$dump->{dump_timestamp} ";
615 $part_line .= "$dump->{partnum}/$dump->{nparts} ";
616 $part_line .= "$dump->{level} ";
617 $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
618 print $logfh "$part_line\n";
620 # TODO: we don't always know nparts when writing a part, so
621 # this is not always an effective way to detect a complete dump.
622 # However, it works for purposes of data vaulting.
623 if ($dump->{'partnum'} == $dump->{'nparts'}) {
624 my $secs = $last_secs + $dump->{'sec'};
625 my $kbs = $last_kbs + $dump->{'kb'};
626 $kps = $secs? ($kbs + 0.0) / $secs : 0.0;
628 my $done_line = "DONE taper ";
629 $done_line .= quote_string($dump->{hostname}) ." ";
630 $done_line .= quote_string($dump->{diskname}) ." ";
631 $done_line .= "$dump->{dump_timestamp} ";
632 $done_line .= "$dump->{nparts} ";
633 $done_line .= "$dump->{level} ";
634 $done_line .= "[sec $secs kb $kbs kps $kps]";
635 print $logfh "$done_line\n";
642 if (!defined $tapelist) {
643 $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
644 $tapelist = Amanda::Tapelist::read_tapelist($tapelist_filename);
648 sub _clear_cache { # (used by installcheck)
649 $tapelist = $tapelist_filename = undef;