git.gag.com Git - debian/amanda/blob - perl/Amanda/Holding.pm

   1 # Copyright (c) 2009 Zmanda, Inc.  All Rights Reserved.
   2 #
   3 # This program is free software; you can redistribute it and/or modify it
   4 # under the terms of the GNU General Public License version 2 as published
   5 # by the Free Software Foundation.
   6 #
   7 # This program is distributed in the hope that it will be useful, but
   8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   9 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  10 # for more details.
  11 #
  12 # You should have received a copy of the GNU General Public License along
  13 # with this program; if not, write to the Free Software Foundation, Inc.,
  14 # 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  15 #
  16 # Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
  17 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  18
  19 package Amanda::Holding;
  20
  21 use base qw( Exporter );
  22 use File::Spec;
  23 use File::stat;
  24 use IO::Dir;
  25 use POSIX qw( :fcntl_h );
  26 use Math::BigInt;
  27 use strict;
  28 use warnings;
  29
  30 use Amanda::Config qw( :getconf );
  31 use Amanda::Debug qw( debug );
  32 use Amanda::Header;
  33 use Amanda::Disklist;
  34 use Amanda::Util;
  35
  36 =head1 NAME
  37
  38 Amanda::Holding -- interface to the holding disks
  39
  40 =head1 SYNOPSIS
  41
  42     use Amanda::Holding;
  43
  44 Get some statistics:
  45
  46     my %size_per_host;
  47     for my $hfile (Amanda::Holding::files()) {
  48         my $hdr = Amanda::Holding::get_header($hfile);
  49         next unless $hdr;
  50         $size_per_host{$hdr->{'name'}} += Amanda::Holding::file_size($hfile);
  51     }
  52
  53 Schematic for something like C<amflush>:
  54
  55     for my $ts (sort Amanda::Holding::get_all_timestamps()) {
  56         print $ts, "\n";
  57     }
  58     my @to_dump = <>;
  59     for my $hfile (Amanda::Holding::get_files_for_flush(@to_dump)) {
  60         # flush $hfile
  61     }
  62
  63 =head1 DESCRIPTION
  64
  65 =head2 TERMINOLOGY
  66
  67 =over
  68
  69 =item Holding disk
  70
  71 A holding disk is a directory given in a holdingdisk definition in
  72 C<amanda.conf>.
  73
  74 =item Holding directory
  75
  76 A holding directory is a subdirectory of a holding disk, generally named by
  77 timestamp.  Note, however, that this package does not interpret holding
  78 directory names as timestamps, and does not provide direct access to holding
  79 directories.
  80
  81 =item Holding file
  82
  83 A holding file describes one or more os-level files (holding file chunks) in a
  84 holding directory, together representing a single dump file.
  85
  86 =item Holding chunk
  87
  88 A holding chunk is an individual os-level file representing part of a holding
  89 file.  Chunks are kept small to avoid hitting filesystem size ilmits, and are
  90 linked together internally by filename.
  91
  92 =back
  93
  94 By way of example:
  95
  96   /data/holding                               <-- holding disk
  97   /data/holding/20070306123456                <-- holding directory
  98   /data/holding/20070306123456/raj._video_a   <-- holding file and chunk
  99   /data/holding/20070306123456/raj._video_a.1 <-- holding chunk
 100
 101 =head2 CONSTANTS
 102
 103 Holding-disk files do not have a block size, so the size of the header is fixed
 104 at 32k.  Rather than hard-code that value, use the constant DISK_BLOCK_BYTES
 105 from this package.
 106
 107 =head2 FUNCTIONS
 108
 109 Note that this package assumes that a config has been loaded (see
 110 L<Amanda::Config>).
 111
 112 These three functions provide basic access to holding disks, files, and chunks:
 113
 114 =over
 115
 116 =item C<disks()>
 117
 118 returns an list of active disks, each represented as a string.  This does not
 119 return holding disks which are defined in C<amanda.conf> but not used.
 120
 121 =item C<files()>
 122
 123 returns a list of active holding files on all disks.  Note that a dump may span
 124 multiple disks, so there is no use in selecting files only on certain holding
 125 disks.
 126
 127 =item C<file_chunks($file)>
 128
 129 returns a list of chunks for the given file.  Chunk filenames are always fully
 130 qualified pathnames.
 131
 132 =back
 133
 134 C<Amanda::Holding> provides a few utility functions on holding files.  Note
 135 that these functions require fully qualified pathnames.
 136
 137 =over
 138
 139 =item C<file_size($file, $ignore_headers)>
 140
 141 returns the size of the holding file I<in kilobytes>, ignoring the size of the
 142 headers if C<$ignore_headers> is true.
 143
 144 =item C<file_unlink($file)>
 145
 146 unlinks (deletes) all chunks comprising C<$file>, returning true on success.
 147
 148 =item C<get_header($file)>
 149
 150 reads and returns the header (see L<Amanda::Header>) for C<$file>.
 151
 152 =back
 153
 154 The remaining two functions are utilities for amflush and related tools:
 155
 156 =over
 157
 158 =item C<get_all_timestamps()>
 159
 160 returns a sorted list of all timestamps with dumps in any active holding disk.
 161
 162 =item C<get_files_for_flush(@timestamps)>
 163
 164 returns a sorted list of files matching any of the supplied timestamps.  Files
 165 for which no DLE exists in the disklist are ignored.  If no timestamps are
 166 provided, then all timestamps are considered.
 167
 168 =back
 169
 170 =cut
 171
 172 use constant DISK_BLOCK_BYTES => 32768;
 173
 174 our @EXPORT_OK = qw(dirs files file_chunks
 175     get_files_for_flush get_all_datestamps
 176     file_size file_unlink get_header);
 177
 178 ##
 179 # utility subs
 180
 181 sub _is_datestr {
 182     my ($str) = @_;
 183
 184     return 0
 185         unless (my ($year, $month, $day, $hour, $min, $sec) =
 186             ($str =~ /(\d{4})(\d{2})(\d{2})(?:(\d{2})(\d{2})(\d{2}))/));
 187
 188     return 0 if ($year < 1990 || $year > 2999);
 189     return 0 if ($month < 1 || $month > 12);
 190     return 0 if ($day < 1 || $day > 31);
 191
 192     return 0 if (defined $hour and $hour > 23);
 193     return 0 if (defined $min and $min > 60);
 194     return 0 if (defined $sec and $sec > 60);
 195
 196     return 1;
 197 }
 198
 199 sub _walk {
 200     my ($file_fn) = @_;
 201
 202     # walk disks, directories, and files with nested loops
 203     for my $disk (disks()) {
 204         my $diskh = IO::Dir->new($disk);
 205         next unless defined $diskh;
 206
 207         while (defined(my $datestr = $diskh->read())) {
 208             next unless (_is_datestr($datestr));
 209
 210             my $dirh = IO::Dir->new(File::Spec->catfile($disk, $datestr));
 211             while (defined(my $dirent = $dirh->read)) {
 212                 next if $dirent eq '.' or $dirent eq '..';
 213
 214                 my $filename = File::Spec->catfile($disk, $datestr, $dirent);
 215                 next unless -f $filename;
 216
 217                 my $hdr = get_header($filename);
 218                 next unless defined($hdr);
 219
 220                 # ignore chunks and anything bogus
 221                 next if ($hdr->{'type'} != $Amanda::Header::F_DUMPFILE);
 222
 223                 $file_fn->($filename, $hdr);
 224             }
 225         }
 226     }
 227 }
 228
 229 ##
 230 # Package functions
 231
 232 sub disks {
 233     my @results;
 234
 235     for my $hdname (@{getconf($CNF_HOLDINGDISK)}) {
 236         my $cfg = lookup_holdingdisk($hdname);
 237         next unless defined $cfg;
 238
 239         my $dir = holdingdisk_getconf($cfg, $HOLDING_DISKDIR);
 240         next unless defined $dir;
 241         next unless -d $dir;
 242         push @results, $dir;
 243     }
 244
 245     return @results;
 246 }
 247
 248 sub files {
 249     my @results;
 250
 251     my $each_file_fn = sub {
 252         my ($filename, $header) = @_;
 253         push @results, $filename;
 254     };
 255     _walk($each_file_fn);
 256
 257     return @results;
 258 }
 259
 260 sub file_chunks {
 261     my ($filename) = @_;
 262     my @results;
 263
 264     while (1) {
 265         last unless -f $filename;
 266         my $hdr = get_header($filename);
 267         last unless defined($hdr);
 268
 269         push @results, $filename;
 270
 271         if ($hdr->{'cont_filename'}) {
 272             $filename = $hdr->{'cont_filename'};
 273         } else {
 274             # no continuation -> we're done
 275             last;
 276         }
 277     }
 278
 279     return @results;
 280 }
 281
 282 sub get_header {
 283     my ($filename) = @_;
 284     return unless -f $filename;
 285
 286     my $fd = POSIX::open($filename, O_RDONLY);
 287     return unless $fd;
 288
 289     my $hdr_bytes = Amanda::Util::full_read($fd, DISK_BLOCK_BYTES);
 290     POSIX::close($fd);
 291     if (length($hdr_bytes) < DISK_BLOCK_BYTES) {
 292         return;
 293     }
 294
 295     return Amanda::Header->from_string($hdr_bytes);
 296 }
 297
 298 sub file_unlink {
 299     my ($filename) = @_;
 300
 301     for my $chunk (file_chunks($filename)) {
 302         unlink($chunk) or return 0;
 303     }
 304
 305     return 1;
 306 }
 307
 308 sub file_size {
 309     my ($filename, $ignore_headers) = @_;
 310     my $total = Math::BigInt->new(0);
 311
 312     for my $chunk (file_chunks($filename)) {
 313         my $sb = stat($chunk);
 314         my $size = Math::BigInt->new($sb->size);
 315         $size -= DISK_BLOCK_BYTES if $ignore_headers;
 316         $size = ($size + 1023) / 1024;
 317
 318         $total += $size;
 319     }
 320
 321     return $total;
 322 }
 323
 324 sub get_files_for_flush {
 325     my (@dateargs) = @_;
 326     my @results;
 327
 328     my $each_file_fn = sub {
 329         my ($filename, $header) = @_;
 330         if (@dateargs && !grep { $_ eq $header->{'datestamp'}; } @dateargs) {
 331             return;
 332         }
 333
 334         if (!Amanda::Disklist::get_disk($header->{'name'}, $header->{'disk'})) {
 335             return;
 336         }
 337
 338         push @results, $filename;
 339     };
 340     _walk($each_file_fn);
 341
 342     return sort @results;
 343 }
 344
 345 sub get_all_datestamps {
 346     my %datestamps;
 347
 348     my $each_file_fn = sub {
 349         my ($filename, $header) = @_;
 350         $datestamps{$header->{'datestamp'}} = 1;
 351     };
 352     _walk($each_file_fn);
 353
 354     return sort keys %datestamps;
 355 }