Imported Upstream version 3.1.0

[debian/amanda] / perl / Amanda / DB / Catalog.pm
diff --git a/perl/Amanda/DB/Catalog.pm b/perl/Amanda/DB/Catalog.pm

index 8e457ea1c133b433f785655489a5896c0d03e28d..fe1a399df24907ba303870ed2be840b375e9ed40 100644 (file)
--- a/perl/Amanda/DB/Catalog.pm
+++ b/perl/Amanda/DB/Catalog.pm
@@ -1,4 +1,4 @@
-# Copyright (c) 2006 Zmanda Inc.  All Rights Reserved.
+# Copyright (c) 2008, 2009, 2010 Zmanda, Inc.  All Rights Reserved.
  #
  # This program is free software; you can redistribute it and/or modify it
  # under the terms of the GNU General Public License version 2 as published
@@ -31,7 +31,7 @@ Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
  
    # loop over those timestamps, printing dump info for each one
    for my $timestamp (@timestamps) {
-      my @dumpfiles = Amanda::DB::Catalog::get_dumps(
+      my @dumpfiles = Amanda::DB::Catalog::get_parts(
           timestamp => $timestamp,
           ok => 1
        );
@@ -42,28 +42,29 @@ Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
        }
    }
  
-=head1 DESCRIPTION
+=head1 MODEL
  
-=head2 MODEL
+The Amanda catalog is modeled as a set of dumps comprised of parts.  A dump is
+a complete bytestream received from an application, and is uniquely identified
+by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
+and C<write_timestamp>.  A dump may be partial, or even a complete failure.
  
-The Amanda catalog is a set of dumpfiles, where each dumpfile corresponds to a
-single file in a storage volume.  On tapes, files are separated by filemarks
-and numbered sequentially.  This model is preserved on non-tape media such as
-the VFS and S3 devices.  A dumpfile, then, is completely specified by a volume
-label and a file number (I<filenum>).
+A part corresponds to a single file on a volume, containing a portion of the
+data for a dump.  A part, then, is completely specified by a volume label and a
+file number (C<filenum>).  Each part has, among other things, a part number
+(C<partnum>) which gives its relative position within the dump.  The bytestream
+for a dump is recovered by concatenating all of the successful (C<status> = OK)
+parts matching the dump.
  
-The catalog is presented as a single table containing one row per dumpfile.
-Each row has the following values:
+Files in the holding disk are considered part of the catalog, and are
+represented as single-part dumps (holding-disk chunking is ignored, as it is
+distinct from split parts).
  
-=over
-
-=item label
+=head2 DUMPS
  
-(string) -- volume label
-
-=item filenum
+The dump table contains one row per dump.  It has the following columns:
  
-(integer) -- file on that volume
+=over
  
  =item dump_timestamp
  
@@ -71,7 +72,8 @@ Each row has the following values:
  
  =item write_timestamp
  
-(string) -- timestamp of the run in which the dump was written to this volume
+(string) -- timestamp of the run in which the part was written to this volume,
+or C<"00000000000000"> for dumps in the holding disk.
  
  =item hostname
  
@@ -87,56 +89,114 @@ Each row has the following values:
  
  =item status
  
-(string) -- "OK", "PARTIAL" or some other descriptor
+(string) -- "OK", "PARTIAL", or "FAIL"
  
-=item partnum
+=item message
  
-(integer) -- part number of a split dump (1-based)
+(string) -- reason for PARTIAL or FAIL status
  
  =item nparts
  
-(integer) -- number of parts in this dump (estimated)
+(integer) -- number of successful parts in this dump
  
  =item kb
  
-(integer) -- size (in kb) of this dumpfile
+(integer) -- size (in kb) of this part
+
+=item orig_kb
+
+(integer) -- size (in kb) of the complete dump (uncompress and uncrypted).
  
  =item sec
  
-(integer) -- time (in seconds) spent writing this dumpfile
+(integer) -- time (in seconds) spent writing this part
+
+=item parts
+
+(arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
+always C<undef>).  When multiple partial parts are available, the choice of the
+partial that is included in this array is undefined.
  
  =back
  
-A dumpfile is represented as a hashref with these keys.
+A dump is represented as a hashref with these keys.
  
-The label and filenum serve as a primary key.  The dump_timestamp, hostname,
-diskname, and level uniquely identify the dump.  The write_timestamp gives the
-time that the dump was written to this volume.  The write_timestamp may differ
-from the dump_timestamp if, for example, I<amflush> wrote the dump to tape
-after the initial dump.  The remaining fields are informational.
+The C<write_timestamp> gives the time of the amanda run in which the part was
+written to this volume.  The C<write_timestamp> may differ from the
+C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
+initial dump.
  
-=head2 NOTES
+=head2 PARTS
+
+The parts table contains one row per part, and has the following columns:
+
+=over
+
+=item label
  
-A dumpfile may be a part of a larger (split) dump, or may be partial (due to
-end of tape or some other error), so the contents of the catalog require some
-interpretation in order to find a particular dump.
+(string) -- volume label (not present for holding files)
+
+=item filenum
+
+(integer) -- file on that volume (not present for holding files)
+
+=item holding_file
+
+(string) -- fully-qualified pathname of the holding file (not present for
+on-media dumps)
+
+=item dump
+
+(object ref) -- a reference to the dump containing this part
+
+=item status
+
+(string) -- "OK", "PARTIAL" or some other descriptor
+
+=item partnum
+
+(integer) -- part number of a split part (1-based)
+
+=item kb
+
+(integer) -- size (in kb) of this part
+
+=item sec
+
+(integer) -- time (in seconds) spent writing this part
+
+=back
+
+A part is represented as a hashref with these keys.  The C<label> and
+C<filenum> serve as a primary key. 
+
+Note that parts' C<dump> and dumps' C<parts> create a reference loop.  This is
+broken by making the C<parts> array's contents weak references in C<get_dumps>,
+and the C<dump> reference weak in C<get_parts>.
+
+=head2 NOTES
  
  All timestamps used in this module are full-length, in the format
  C<YYYYMMDDHHMMSS>.  If the underlying data contains only datestamps, they are
-zero-extended into timestamps: C<YYYYMMDD000000>.  A dump_timestamp always
+zero-extended into timestamps: C<YYYYMMDD000000>.  A C<dump_timestamp> always
  corresponds to the initiation of the I<original> dump run, while
-write_timestamp gives the time the file was written to the volume.  When
-dumpfiles are migrated from volume to volume (e.g., by I<amflush>), the
-dump_timestamp does not change.  
+C<write_timestamp> gives the time the file was written to the volume.  When
+parts are migrated from volume to volume (e.g., by I<amvault>), the
+C<dump_timestamp> does not change.  
+
+In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
+serves as a unique identifier for a dump bytestream, but because the bytestream
+may appear several times in the catalog (due to vaulting) the additional
+C<write_timestamp> is required to identify a particular on-storage instance of
+a dump.  Note that the part sizes may differ between instances, so it is not
+valid to concatenate parts from different dump instances.
  
-In Amanda, the tuple (hostname, diskname, level, dump_timestamp) serves as a unique
-identifier for a dump.  Since all of this information is preserved during
-migrations, a catalog query with these four terms will return all dumpfiles
-relevant to that dump.
+=head1 INTERFACES
  
-=head2 QUERIES
+=head2 SUMMARY DATA
  
-This API is read-only at the moment.  The following functions are available:
+The following functions provide summary data based on the contents of the
+catalog.
  
  =over
  
@@ -152,85 +212,102 @@ Return the most recent write timestamp.
  
  Return a list of labels for volumes written at the given timestamp.
  
-=item get_dumps(%parameters)
+=back
  
-This function is the workhorse query interface, and returns a sequence of
-dumpfiles.  Values in C<%parameters> restrict the set of dumpfiles that are
-returned.  The hash can have any of the following keys:
+=head2 PARTS
+
+=over
+
+=item get_parts(%parameters)
+
+This function returns a sequence of parts.  Values in C<%parameters> restrict
+the set of parts that are returned.  The hash can have any of the following
+keys:
  
  =over
  
  =item write_timestamp
  
-restrict to dumpfiles written at this timestamp
+restrict to parts written at this timestamp
  
  =item write_timestamps
  
-(arrayref) restrict to dumpfiles written at any of these timestamps
+(arrayref) restrict to parts written at any of these timestamps (note that
+holding-disk files have no C<write_timestamp>, so this option and the previous
+will omit them)
  
  =item dump_timestamp
  
-restrict to dumpfiles with exactly this timestamp
+restrict to parts with exactly this timestamp
  
  =item dump_timestamps
  
-(arrayref) restrict to dumpfiles with any of these timestamps
+(arrayref) restrict to parts with any of these timestamps
  
  =item dump_timestamp_match
  
-restrict to dumpfiles with timestamps matching this expression
+restrict to parts with timestamps matching this expression
+
+=item holding
+
+if true, only return dumps on holding disk.  If false, omit dumps on holding
+disk.
  
  =item hostname
  
-restrict to dumpfiles with exactly this hostname
+restrict to parts with exactly this hostname
  
  =item hostnames
  
-(arrayref) restrict to dumpfiles with any of these hostnames
+(arrayref) restrict to parts with any of these hostnames
  
  =item hostname_match
  
-restrict to dumpfiles with hostnames matching this expression
+restrict to parts with hostnames matching this expression
  
  =item diskname
  
-restrict to dumpfiles with exactly this diskname
+restrict to parts with exactly this diskname
  
  =item disknames
  
-(arrayref) restrict to dumpfiles with any of these disknames
+(arrayref) restrict to parts with any of these disknames
  
  =item diskname_match
  
-restrict to dumpfiles with disknames matching this expression
+restrict to parts with disknames matching this expression
  
  =item label
  
-restrict to dumpfiles with exactly this label
+restrict to parts with exactly this label
  
  =item labels
  
-(arrayref) restrict to dumpfiles with any of these labels
+(arrayref) restrict to parts with any of these labels
  
  =item level
  
-restrict to dumpfiles with exactly this level
+restrict to parts with exactly this level
  
  =item levels
  
-(arrayref) restrict to dumpfiles with any of these levels
+(arrayref) restrict to parts with any of these levels
  
  =item status
  
-restrict to dumpfiles with this status
+restrict to parts with this status
+
+=item dumpspecs
+
+(arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs
  
  =back
  
  Match expressions are described in the amanda(8) manual page.
  
-=item sort_dumps([ $key1, $key2, .. ], @dumps)
+=item sort_parts([ $key1, $key2, .. ], @parts)
  
-Given a list of dumps, this function sorts that list by the requested keys.
+Given a list of parts, this function sorts that list by the requested keys.
  The following keys are available:
  
  =over
@@ -249,44 +326,68 @@ The following keys are available:
  
  =item label
  
-=item partnum
+Note that this sorts labels I<lexically>, not necessarily in the order they were used!
  
-=item kb
+=item partnum
  
-=item sec
+=item nparts
  
  =back
  
  Keys are processed from left to right: if two dumps have the same value for
-C<$key1>, then C<$key2> is examined, and so on.  Key names may be prefixed by
-"C<->" to reverse the order.
+C<$key1>, then C<$key2> is examined, and so on.  Key names may be prefixed by a
+dash (C<->) to reverse the order.
+
+Note that some of these keys are dump keys; the function will automatically
+access those values via the C<dump> attribute.
+
+=back
  
-=item add_dump($dumpfile)
+=head2 DUMPS
  
-Add the given dumpfile to the database.  In terms of logfiles, this will either
-create a new logfile (if the dump's C<write_timestamp> has not been seen
+=over
+
+=item get_dumps(%parameters)
+
+This function returns a sequence of dumps.  Values in C<%parameters> restrict
+the set of dumps that are returned.  The same keys as are used for C<get_parts>
+are available here, with the exception of C<label> and C<labels>.  The
+C<status> key applies to the dump status, not the status of its constituent
+parts.
+
+=item sort_dumps([ $key1, $key2 ], @dumps)
+
+Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
+The same keys are available, with the exception of C<label>, C<filenum>, and
+C<partnum>.
+
+=back
+
+=head2 ADDING DATA
+
+=over
+
+=item add_part($part)
+
+Add the given part to the database.  In terms of logfiles, this will either
+create a new logfile (if the part's C<write_timestamp> has not been seen
  before) or append to an existing logfile.  Note that a new logfile will require
  a corresponding new entry in the tapelist.
  
  Note that no locking is performed: multiple simultaneous calls to this function
  can result in a corrupted or incorrect logfile.
  
-=back
-
-=head1 API STATUS
+TODO: add_dump
  
-New summary functions may be added to reduce code duplication in other parts of
-Amanda.
-
-Support for loading and modifying the tapelist may eventually be folded into
-this module.
+=back
  
  =cut
  
-use Amanda::Logfile;
+use Amanda::Logfile qw( :constants match_disk match_host
+                       match_datestamp match_level );
  use Amanda::Tapelist;
  use Amanda::Config qw( :init :getconf config_dir_relative );
-use Amanda::Util qw( quote_string );
+use Amanda::Util qw( quote_string weaken_ref );
  use warnings;
  use strict;
  
@@ -328,7 +429,11 @@ sub get_latest_write_timestamp {
      return undef;
  }
  
-sub get_dumps {
+# this generic function implements the loop of scanning logfiles to find
+# the requested data; get_parts and get_dumps then adjust the results to
+# match what the user expects.
+sub get_parts_and_dumps {
+    my $get_what = shift; # "parts" or "dumps"
      my %params = @_;
      my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
  
@@ -346,13 +451,27 @@ sub get_dumps {
         if exists($params{'diskname'});
      push @{$params{'levels'}}, $params{'level'} 
         if exists($params{'level'});
-    push @{$params{'labels'}}, $params{'label'} 
-       if exists($params{'label'});
+    if ($get_what eq 'parts') {
+       push @{$params{'labels'}}, $params{'label'} 
+           if exists($params{'label'});
+    } else {
+       delete $params{'labels'};
+    }
+
+    # specifying write_timestamps implies we won't check holding files
+    if ($params{'write_timestamps'}) {
+       if (defined $params{'holding'} and $params{'holding'}) {
+           return [], []; # well, that's easy..
+       }
+       $params{'holding'} = 0;
+    }
  
      # Since we're working from logfiles, we have to pick the logfiles we'll use first.
      # Then we can use search_logfile.
      my @logfiles;
-    if (exists($params{'write_timestamps'})) {
+    if ($params{'holding'}) {
+       @logfiles = ( 'holding', );
+    } elsif (exists($params{'write_timestamps'})) {
         # if we have specific write_timestamps, the job is pretty easy.
         my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
         for my $logfile (Amanda::Logfile::find_log()) {
@@ -387,13 +506,32 @@ sub get_dumps {
      %labels_hash = map { ($_, undef) } @{$params{'labels'}}
         if (exists($params{'labels'}));
  
+    my %dumps;
+    my @parts;
+
+    # *also* scan holding if the holding param wasn't specified
+    if (!exists $params{'holding'}) {
+       push @logfiles, 'holding';
+    }
+
      # now loop over those logfiles and use search_logfile to load the dumpfiles
      # from them, then process each entry from the logfile
-    my @results;
      for my $logfile (@logfiles) {
-       # get the raw contents from search_logfile
-       my @find_results = Amanda::Logfile::search_logfile(undef, undef,
-                                                   "$logfile_dir/$logfile", 1);
+       my (@find_results, $write_timestamp);
+
+       # get the raw contents from search_logfile, or use holding if
+       # $logfile is undef
+       if ($logfile ne 'holding') {
+           @find_results = Amanda::Logfile::search_logfile(undef, undef,
+                                                       "$logfile_dir/$logfile", 1);
+           # convert to dumpfile hashes, including the write_timestamp from the logfile name
+           my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
+           $write_timestamp = zeropad($timestamp);
+
+       } else {
+           @find_results = Amanda::Logfile::search_holding_disk();
+           $write_timestamp = '00000000000000';
+       }
  
         # filter against *_match with dumps_match
         @find_results = Amanda::Logfile::dumps_match([@find_results],
@@ -403,10 +541,6 @@ sub get_dumps {
             undef,
             0);
  
-       # convert to dumpfile hashes, including the write_timestamp from the logfile name
-       my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
-       my $write_timestamp = zeropad($timestamp);
-
         # loop over each entry in the logfile.
         for my $find_result (@find_results) {
  
@@ -424,67 +558,282 @@ sub get_dumps {
                 and !exists($levels_hash{$find_result->{'level'}}));
             next if (%labels_hash 
                 and !exists($labels_hash{$find_result->{'label'}}));
-           next if (exists($params{'status'}) 
-               and $find_result->{'status'} ne $params{'status'});
-
-           # start setting up a dumpfile hash for this result
-           my %dumpfile = (
-               'write_timestamp' => $write_timestamp,
-               'dump_timestamp' => zeropad($find_result->{'timestamp'}),
-               'hostname' => $find_result->{'hostname'},
-               'diskname' => $find_result->{'diskname'},
-               'level' => $find_result->{'level'},
-               'label' => $find_result->{'label'},
-               'filenum' => $find_result->{'filenum'},
-               'status' => $find_result->{'status'},
-               'sec' => $find_result->{'sec'},
-               'kb' => $find_result->{'kb'},
-           );
-
-           # partnum and nparts takes some special interpretation
-           if (my ($partnum, $nparts) = $find_result->{'partnum'} =~ m$(\d+)/(-?\d+)$) {
-               $dumpfile{'partnum'} = $partnum+0;
-               $dumpfile{'nparts'} = $nparts+0;
+           if ($get_what eq 'parts') {
+               next if (exists($params{'status'}) 
+                   and $find_result->{'status'} ne $params{'status'});
+           }
+
+           # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
+           # tendency to produce duplicate results
+           next if ($params{'dumpspecs'}
+               and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
+                                                   $params{'dumpspecs'}, 0));
+
+           my $dump_timestamp = zeropad($find_result->{'timestamp'});
+
+           my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
+                                    $write_timestamp, $find_result->{'level'});
+           my $dump = $dumps{$dumpkey};
+           if (!defined $dump) {
+               $dump = $dumps{$dumpkey} = {
+                   dump_timestamp => $dump_timestamp,
+                   write_timestamp => $write_timestamp,
+                   hostname => $find_result->{'hostname'},
+                   diskname => $find_result->{'diskname'},
+                   level => $find_result->{'level'}+0,
+                   orig_kb => $find_result->{'orig_kb'},
+                   status => $find_result->{'dump_status'},
+                   message => $find_result->{'message'},
+                   # the rest of these params are unknown until we see a taper
+                   # DONE, PARTIAL, or FAIL line, although we count nparts
+                   # manually instead of relying on the logfile
+                   nparts => 0,
+                   kb => -1,
+                   sec => -1,
+               };
+           }
+
+           # start setting up a part hash for this result
+           my %part;
+           if ($logfile ne 'holding') {
+               # on-media dump
+               %part = (
+                   label => $find_result->{'label'},
+                   filenum => $find_result->{'filenum'},
+                   dump => $dump,
+                   status => $find_result->{'status'},
+                   sec => $find_result->{'sec'},
+                   kb => $find_result->{'kb'},
+                   orig_kb => $find_result->{'orig_kb'},
+                   partnum => $find_result->{'partnum'},
+               );
+           } else {
+               # holding disk
+               %part = (
+                   holding_file => $find_result->{'label'},
+                   dump => $dump,
+                   status => $find_result->{'status'},
+                   sec => 0.0,
+                   kb => $find_result->{'kb'},
+                   orig_kb => $find_result->{'orig_kb'},
+                   partnum => 1,
+               );
+               # and fix up the dump, too
+               $dump->{'status'} = $find_result->{'status'};
+               $dump->{'kb'} = $find_result->{'kb'};
+               $dump->{'sec'} = $find_result->{'sec'};
+           }
+
+           # weaken the dump ref if we're returning dumps
+           weaken_ref($part{'dump'})
+               if ($get_what eq 'dumps');
+
+           # count the number of successful parts in the dump
+           $dump->{'nparts'}++ if $part{'status'} eq 'OK';
+           
+           # and add a ref to the array of parts; if we're getting
+           # parts, then this is a weak ref
+           $dump->{'parts'}[$part{'partnum'}] = \%part;
+           weaken_ref($dump->{'parts'}[$part{'partnum'}])
+               if ($get_what eq 'parts');
+
+           push @parts, \%part;
+       }
+
+       # if these dumps were on the holding disk, then we're done
+       next if $logfile eq 'holding';
+
+       # re-read the logfile to extract dump-level info that's not captured by
+       # search_logfile
+       my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
+       die "logfile '$logfile' not found" unless $logh;
+       while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
+           next unless $prog == $P_TAPER;
+           my $status;
+           if ($type == $L_DONE) {
+               $status = 'OK';
+           } elsif ($type == $L_PARTIAL) {
+               $status = 'PARTIAL';
+           } elsif ($type == $L_FAIL) {
+               $status = 'FAIL';
+           } else {
+               next;
+           }
+
+           # now extract the appropriate info; luckily these log lines have the same
+           # format, more or less
+           my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $message);
+           ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
+           ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
+           ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
+           if ($status ne 'FAIL') {
+               ($nparts, $str) = Amanda::Util::skip_quoted_string($str);
+           } else {
+               $nparts = 0;
+           }
+           ($level, $str) = Amanda::Util::skip_quoted_string($str);
+           if ($status ne 'FAIL') {
+               my $s = $str;
+               ($secs, $kb, $str) = ($str =~ /^\[sec ([0-9.]+) kb (\d+) .*\] ?(.*)$/)
+                   or die("'$s'");
+           }
+           if ($status ne 'OK') {
+               $message = $str;
             } else {
-               $dumpfile{'partnum'} = 1;
-               $dumpfile{'nparts'} = 1;
+               $message = '';
+           }
+
+           $hostname = Amanda::Util::unquote_string($hostname);
+           $diskname = Amanda::Util::unquote_string($diskname);
+           $message = Amanda::Util::unquote_string($message) if $message;
+
+           # filter against dump criteria
+           next if ($params{'dump_timestamp_match'}
+               and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
+           next if (%dump_timestamps_hash 
+               and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));
+
+           next if ($params{'hostname_match'}
+               and !match_host($params{'hostname_match'}, $hostname));
+           next if (%hostnames_hash 
+               and !exists($hostnames_hash{$hostname}));
+
+           next if ($params{'diskname_match'}
+               and !match_disk($params{'diskname_match'}, $diskname));
+           next if (%disknames_hash 
+               and !exists($disknames_hash{$diskname}));
+
+           next if (%levels_hash 
+               and !exists($levels_hash{$level}));
+           # get_dumps filters on status
+
+           if ($params{'dumpspecs'}) {
+               my $ok = 0;
+               for my $ds (@{$params{'dumpspecs'}}) {
+                   # (the "". are for SWIG's benefit - SWIGged functions don't like
+                   # strings generated by SWIG.  Long story.)
+                   next if (defined $ds->{'host'}
+                           and !match_host("".$ds->{'host'}, $hostname));
+                   next if (defined $ds->{'disk'}
+                           and !match_disk("".$ds->{'disk'}, $diskname));
+                   next if (defined $ds->{'datestamp'}
+                           and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
+                   next if (defined $ds->{'level'}
+                           and !match_level("".$ds->{'level'}, $level));
+
+                   $ok = 1;
+                   last;
+               }
+               next unless $ok;
             }
  
-           # check partnum and nparts
-           next if (defined($params{'partnum'}) and $dumpfile{'partnum'} != $params{'partnum'});
-           next if (defined($params{'nparts'}) and $dumpfile{'nparts'} != $params{'nparts'});
+           my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp, $level);
+           my $dump = $dumps{$dumpkey};
+           if (!defined $dump) {
+               # this will happen when a dump has no parts - a FAILed dump.
+               $dump = $dumps{$dumpkey} = {
+                   dump_timestamp => $dump_timestamp,
+                   write_timestamp => $write_timestamp,
+                   hostname => $hostname,
+                   diskname => $diskname,
+                   level => $level+0,
+                   nparts => $nparts, # hopefully 0?
+               };
+           }
  
-           push @results, \%dumpfile;
+           $dump->{'message'} = $message;
+           if ($status eq 'FAIL') {
+               $dump->{'kb'} = 0;
+               $dump->{'sec'} = 0.0;
+           } else {
+               $dump->{'kb'} = $kb+0;
+               $dump->{'sec'} = $secs+0.0;
+           }
         }
+       Amanda::Logfile::close_logfile($logh);
      }
  
-    return @results;
+    return [ values %dumps], \@parts;
+}
+
+sub get_parts {
+    my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
+    return @$parts;
+}
+
+sub get_dumps {
+    my %params = @_;
+    my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
+    my @dumps = @$dumps;
+
+    if (exists $params{'status'}) {
+       @dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;
+    }
+
+    return @dumps;
+}
+
+sub sort_parts {
+    my ($keys, @parts) = @_;
+
+    # TODO: make this more efficient by selecting the comparison
+    # functions once, in advance, and just applying them
+    return sort {
+       my $res;
+       for my $key (@$keys) {
+           my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
+
+           if ($k =~ /^(partnum|filenum)$/) {
+               # compare part components numerically
+               $res = $a->{$k} <=> $b->{$k};
+           } elsif ($k =~ /^(nparts|level)$/) {
+               # compare dump components numerically
+               $res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
+           } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
+               # compare dump components alphabetically
+               $res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
+           } else { # (label)
+               # compare part components alphabetically
+               $res = $a->{$k} cmp $b->{$k};
+           }
+           $res = -$res if ($rev eq '-' and $res);
+           return $res if $res;
+       }
+       return 0;
+    } @parts;
  }
  
  sub sort_dumps {
      my ($keys, @dumps) = @_;
  
+    # TODO: make this more efficient by selecting the comparison
+    # functions once, in advance, and just applying them
      return sort {
-       my $r;
+       my $res;
         for my $key (@$keys) {
-           if ($key =~ /^-(.*)$/) {
-               $r = $b->{$1} cmp $a->{$1}; # note: $a and $b are reversed
-           } else {
-               $r = $a->{$key} cmp $b->{$key};
-           }
-           return $r if $r;
+           my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
+
+           if ($k =~ /^(nparts|level)$/) {
+               # compare dump components numerically
+               $res = $a->{$k} <=> $b->{$k};
+           } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
+               # compare dump components alphabetically
+               $res = $a->{$k} cmp $b->{$k};
+           } 
+           $res = -$res if ($rev eq '-' and $res);
+           return $res if $res;
         }
         return 0;
      } @dumps;
  }
  
-# caches for add_dump() to avoid repeatedly looking up the log
+# caches for add_part() to avoid repeatedly looking up the log
  # filename for a particular write_timestamp.
-my $add_dump_last_label = undef;
-my $add_dump_last_write_timestamp = undef;
-my $add_dump_last_logfile = undef;
+my $add_part_last_label = undef;
+my $add_part_last_write_timestamp = undef;
+my $add_part_last_logfile = undef;
  
-sub add_dump {
+sub add_part {
      my ($dump) = @_;
      my $found;
      my $logfh;
@@ -499,12 +848,12 @@ sub add_dump {
      die "dump has no 'write_timestamp'" unless defined $write_timestamp;
  
      # consult our one-element cache for this label and write_timestamp
-    if (!defined $add_dump_last_label
-       or $add_dump_last_label ne $dump->{'label'}
-       or $add_dump_last_write_timestamp ne $dump->{'write_timestamp'}) {
+    if (!defined $add_part_last_label
+       or $add_part_last_label ne $dump->{'label'}
+       or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {
  
         # update the cache
-       $add_dump_last_logfile = undef;
+       $add_part_last_logfile = undef;
         LOGFILE:
         for my $lf (Amanda::Logfile::find_log()) {
             next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
@@ -517,15 +866,15 @@ sub add_dump {
                 next unless (defined $find_result->{'label'});
  
                 if ($find_result->{'label'} eq $dump->{'label'}) {
-                   $add_dump_last_label = $dump->{'label'};
-                   $add_dump_last_write_timestamp = $dump->{'write_timestamp'};
-                   $add_dump_last_logfile = $lf;
+                   $add_part_last_label = $dump->{'label'};
+                   $add_part_last_write_timestamp = $dump->{'write_timestamp'};
+                   $add_part_last_logfile = $lf;
                     last LOGFILE;
                 }
             }
         }
      }
-    $logfile = $add_dump_last_logfile;
+    $logfile = $add_part_last_logfile;
  
      # truncate the write_timestamp if we're not using timestamps
      if (!getconf($CNF_USETIMESTAMPS)) {
@@ -553,7 +902,7 @@ sub add_dump {
  
             # if this is part number 1, reset our secs and kbs counters on the
             # assumption that this is the beginning of a new dump
-           if ($find_result->{'partnum'} =~ qr{1/\d}) {
+           if ($find_result->{'partnum'} == 1) {
                 $last_secs = $last_kbs = 0;
             }
             $last_secs += $find_result->{'sec'};