git.gag.com Git - debian/amanda/blob - perl/Amanda/Holding.pm

   1 # Copyright (c) 2009-2012 Zmanda, Inc.  All Rights Reserved.
   2 #
   3 # This program is free software; you can redistribute it and/or
   4 # modify it under the terms of the GNU General Public License
   5 # as published by the Free Software Foundation; either version 2
   6 # of the License, or (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful, but
   9 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  11 # for more details.
  12 #
  13 # You should have received a copy of the GNU General Public License along
  14 # with this program; if not, write to the Free Software Foundation, Inc.,
  15 # 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16 #
  17 # Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
  18 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  19
  20 package Amanda::Holding;
  21
  22 use base qw( Exporter );
  23 use File::Spec;
  24 use File::stat;
  25 use IO::Dir;
  26 use POSIX qw( :fcntl_h );
  27 use Math::BigInt;
  28 use strict;
  29 use warnings;
  30
  31 use Amanda::Config qw( :getconf );
  32 use Amanda::Debug qw( debug );
  33 use Amanda::Header;
  34 use Amanda::Disklist;
  35 use Amanda::Util;
  36
  37 =head1 NAME
  38
  39 Amanda::Holding -- interface to the holding disks
  40
  41 =head1 SYNOPSIS
  42
  43     use Amanda::Holding;
  44
  45 Get some statistics:
  46
  47     my %size_per_host;
  48     for my $hfile (Amanda::Holding::files()) {
  49         my $hdr = Amanda::Holding::get_header($hfile);
  50         next unless $hdr;
  51         $size_per_host{$hdr->{'name'}} += Amanda::Holding::file_size($hfile);
  52     }
  53
  54 Schematic for something like C<amflush>:
  55
  56     for my $ts (sort Amanda::Holding::get_all_timestamps()) {
  57         print $ts, "\n";
  58     }
  59     my @to_dump = <>;
  60     for my $hfile (Amanda::Holding::get_files_for_flush(@to_dump)) {
  61         # flush $hfile
  62     }
  63
  64 =head1 DESCRIPTION
  65
  66 =head2 TERMINOLOGY
  67
  68 =over
  69
  70 =item Holding disk
  71
  72 A holding disk is a directory given in a holdingdisk definition in
  73 C<amanda.conf>.
  74
  75 =item Holding directory
  76
  77 A holding directory is a subdirectory of a holding disk, generally named by
  78 timestamp.  Note, however, that this package does not interpret holding
  79 directory names as timestamps, and does not provide direct access to holding
  80 directories.
  81
  82 =item Holding file
  83
  84 A holding file describes one or more os-level files (holding file chunks) in a
  85 holding directory, together representing a single dump file.
  86
  87 =item Holding chunk
  88
  89 A holding chunk is an individual os-level file representing part of a holding
  90 file.  Chunks are kept small to avoid hitting filesystem size ilmits, and are
  91 linked together internally by filename.
  92
  93 =back
  94
  95 By way of example:
  96
  97   /data/holding                               <-- holding disk
  98   /data/holding/20070306123456                <-- holding directory
  99   /data/holding/20070306123456/raj._video_a   <-- holding file and chunk
 100   /data/holding/20070306123456/raj._video_a.1 <-- holding chunk
 101
 102 =head2 CONSTANTS
 103
 104 Holding-disk files do not have a block size, so the size of the header is fixed
 105 at 32k.  Rather than hard-code that value, use the constant DISK_BLOCK_BYTES
 106 from this package.
 107
 108 =head2 FUNCTIONS
 109
 110 Note that this package assumes that a config has been loaded (see
 111 L<Amanda::Config>).
 112
 113 These three functions provide basic access to holding disks, files, and chunks:
 114
 115 =over
 116
 117 =item C<disks()>
 118
 119 returns an list of active disks, each represented as a string.  This does not
 120 return holding disks which are defined in C<amanda.conf> but not used.
 121
 122 =item C<files()>
 123
 124 returns a list of active holding files on all disks.  Note that a dump may span
 125 multiple disks, so there is no use in selecting files only on certain holding
 126 disks.
 127
 128 =item C<file_chunks($file)>
 129
 130 returns a list of chunks for the given file.  Chunk filenames are always fully
 131 qualified pathnames.
 132
 133 =back
 134
 135 C<Amanda::Holding> provides a few utility functions on holding files.  Note
 136 that these functions require fully qualified pathnames.
 137
 138 =over
 139
 140 =item C<file_size($file, $ignore_headers)>
 141
 142 returns the size of the holding file I<in kilobytes>, ignoring the size of the
 143 headers if C<$ignore_headers> is true.
 144
 145 =item C<file_unlink($file)>
 146
 147 unlinks (deletes) all chunks comprising C<$file>, returning true on success.
 148
 149 =item C<get_header($file)>
 150
 151 reads and returns the header (see L<Amanda::Header>) for C<$file>.
 152
 153 =back
 154
 155 The remaining two functions are utilities for amflush and related tools:
 156
 157 =over
 158
 159 =item C<get_all_timestamps()>
 160
 161 returns a sorted list of all timestamps with dumps in any active holding disk.
 162
 163 =item C<get_files_for_flush(@timestamps)>
 164
 165 returns a sorted list of files matching any of the supplied timestamps.  Files
 166 for which no DLE exists in the disklist are ignored.  If no timestamps are
 167 provided, then all timestamps are considered.
 168
 169 =back
 170
 171 =cut
 172
 173 use constant DISK_BLOCK_BYTES => 32768;
 174
 175 our @EXPORT_OK = qw(dirs files file_chunks
 176     get_files_for_flush get_all_datestamps
 177     file_size file_unlink get_header);
 178
 179 ##
 180 # utility subs
 181
 182 sub _is_datestr {
 183     my ($str) = @_;
 184
 185     return 0
 186         unless (my ($year, $month, $day, $hour, $min, $sec) =
 187             ($str =~ /(\d{4})(\d{2})(\d{2})(?:(\d{2})(\d{2})(\d{2}))/));
 188
 189     return 0 if ($year < 1990 || $year > 2999);
 190     return 0 if ($month < 1 || $month > 12);
 191     return 0 if ($day < 1 || $day > 31);
 192
 193     return 0 if (defined $hour and $hour > 23);
 194     return 0 if (defined $min and $min > 60);
 195     return 0 if (defined $sec and $sec > 60);
 196
 197     return 1;
 198 }
 199
 200 sub _walk {
 201     my ($file_fn, $verbose) = @_;
 202
 203     # walk disks, directories, and files with nested loops
 204     for my $disk (disks()) {
 205         my $diskh = IO::Dir->new($disk);
 206         if (!defined $diskh) {
 207             print $verbose "could not open holding dir '$disk': $!\n" if $verbose;
 208             next;
 209         }
 210
 211         while (defined(my $datestr = $diskh->read())) {
 212             next if $datestr eq '.' or $datestr eq '..';
 213
 214             my $dirfn = File::Spec->catfile($disk, $datestr);
 215
 216             if (!_is_datestr($datestr)) {
 217                 print $verbose "holding dir '$dirfn' is not a datestamp\n" if $verbose;
 218                 next;
 219             }
 220             if (!-d $dirfn) {
 221                 print $verbose "holding dir '$dirfn' is not a directory\n" if $verbose;
 222                 next;
 223             }
 224
 225             my $dirh = IO::Dir->new($dirfn);
 226             while (defined(my $dirent = $dirh->read)) {
 227                 next if $dirent eq '.' or $dirent eq '..';
 228
 229                 my $filename = File::Spec->catfile($disk, $datestr, $dirent);
 230                 if (!-f $filename) {
 231                     print $verbose "holding file '$filename' is not a file\n" if $verbose;
 232                     next;
 233                 }
 234
 235                 my $hdr = get_header($filename);
 236                 next unless defined($hdr);
 237
 238                 $file_fn->($filename, $hdr);
 239             }
 240         }
 241     }
 242 }
 243
 244 ##
 245 # Package functions
 246
 247 sub disks {
 248     my @results;
 249
 250     for my $hdname (@{getconf($CNF_HOLDINGDISK)}) {
 251         my $cfg = lookup_holdingdisk($hdname);
 252         next unless defined $cfg;
 253
 254         my $dir = holdingdisk_getconf($cfg, $HOLDING_DISKDIR);
 255         next unless defined $dir;
 256         next unless -d $dir;
 257         push @results, $dir;
 258     }
 259
 260     return @results;
 261 }
 262
 263 sub files {
 264     my $verbose = shift;
 265     my @results;
 266
 267     my $each_file_fn = sub {
 268         my ($filename, $header) = @_;
 269         return if $header->{'type'} != $Amanda::Header::F_DUMPFILE;
 270
 271         push @results, $filename;
 272     };
 273     _walk($each_file_fn, $verbose);
 274
 275     return @results;
 276 }
 277
 278 sub all_files {
 279     my $verbose = shift;
 280     my @results;
 281
 282     my $each_file_fn = sub {
 283         my ($filename, $header) = @_;
 284         push @results, { filename => $filename, header => $header };
 285     };
 286     _walk($each_file_fn, $verbose);
 287
 288     return @results;
 289 }
 290
 291 sub merge_all_files {
 292     my @files = @_;
 293     my %hfiles;
 294     my @result;
 295
 296     for my $file (@files) {
 297         $hfiles{$file->{'filename'}} = $file->{'header'};
 298     }
 299
 300     foreach my $filename (keys %hfiles) {
 301         next if !exists $hfiles{$filename};
 302         if ($hfiles{$filename}->{'type'} == $Amanda::Header::F_DUMPFILE) {
 303             push @result, {filename => $filename, header => $hfiles{$filename}};
 304             my $is_tmp = ($filename =~ /\.tmp$/);
 305             my $cont_filename = $filename;
 306             my $cfilename = $hfiles{$cont_filename}->{'cont_filename'};
 307             my $cf = $cfilename;
 308             $cf .= ".tmp" if $is_tmp;
 309             while (defined $cfilename && $cfilename ne "" && -f $cf) {
 310                 delete $hfiles{$cont_filename};
 311                 $cont_filename = $cf;
 312                 $cfilename = $hfiles{$cont_filename}->{'cont_filename'};
 313                 $cf = $cfilename;
 314                 $cf .= ".tmp" if $is_tmp;
 315             }
 316             delete $hfiles{$cont_filename};
 317         } elsif ($hfiles{$filename}->{'type'} != $Amanda::Header::F_CONT_DUMPFILE) {
 318             push @result, {filename => $filename, header => $hfiles{$filename}};
 319             delete $hfiles{$filename}
 320         } else {
 321            # do nothing for F_CONTFILE
 322         }
 323     }
 324
 325     foreach my $filename (keys %hfiles) {
 326         next if !exists $hfiles{$filename};
 327         if ($hfiles{$filename}->{'type'} == $Amanda::Header::F_CONT_DUMPFILE) {
 328             push @result, {filename => $filename, header => $hfiles{$filename}};
 329         } else {
 330             delete $hfiles{$filename}
 331         }
 332     }
 333     return @result;
 334 }
 335
 336 sub file_chunks {
 337     my ($filename) = @_;
 338     my @results;
 339
 340     while (1) {
 341         last unless -f $filename;
 342         my $hdr = get_header($filename);
 343         last unless defined($hdr);
 344
 345         push @results, $filename;
 346
 347         if ($hdr->{'cont_filename'}) {
 348             $filename = $hdr->{'cont_filename'};
 349         } else {
 350             # no continuation -> we're done
 351             last;
 352         }
 353     }
 354
 355     return @results;
 356 }
 357
 358 sub file_tmp_chunks {
 359     my ($filename) = @_;
 360     my @results;
 361
 362     while (1) {
 363         last unless -f $filename;
 364         my $hdr = get_header($filename);
 365         last unless defined($hdr);
 366
 367         push @results, $filename;
 368
 369         if ($hdr->{'cont_filename'}) {
 370             $filename = $hdr->{'cont_filename'} . ".tmp";
 371         } else {
 372             # no continuation -> we're done
 373             last;
 374         }
 375     }
 376
 377     return @results;
 378 }
 379
 380 sub rename_tmp {
 381     my ($filename) = shift;
 382     my ($complete) = shift;
 383
 384     my @files = file_tmp_chunks($filename);
 385     while (my $tmp_filename = pop @files) {
 386         my $hdr = get_header($tmp_filename);
 387         if ($hdr->{'is_partial'} == 0 and $complete == 0) {
 388             $hdr->{'is_partial'} = 1;
 389             write_header($tmp_filename, $hdr);
 390         }
 391         my $hfilename = $tmp_filename;
 392         $hfilename =~ s/\.tmp$//;
 393         rename $tmp_filename, $hfilename;
 394     }
 395
 396     return
 397 }
 398
 399 sub get_header {
 400     my ($filename) = @_;
 401     return unless -f $filename;
 402
 403     my $fd = POSIX::open($filename, O_RDONLY);
 404     return unless $fd;
 405
 406     my $hdr_bytes = Amanda::Util::full_read($fd, DISK_BLOCK_BYTES);
 407     POSIX::close($fd);
 408     if (length($hdr_bytes) == 0) {
 409         my $hdr = Amanda::Header->new();
 410         $hdr->{'type'} = $Amanda::Header::F_EMPTY;
 411         return $hdr;
 412     } elsif (length($hdr_bytes) < DISK_BLOCK_BYTES) {
 413         my $hdr = Amanda::Header->new();
 414         $hdr->{'type'} = $Amanda::Header::F_UNKNOWN;
 415         return $hdr;
 416     }
 417
 418     return Amanda::Header->from_string($hdr_bytes);
 419 }
 420
 421 sub write_header {
 422     my $filename = shift;
 423     my $hdr = shift;
 424
 425     return unless -f $filename;
 426     my $fd = POSIX::open($filename, O_RDWR);
 427     return unless $fd;
 428     my $header = $hdr->to_string(DISK_BLOCK_BYTES, DISK_BLOCK_BYTES);
 429     Amanda::Util::full_write($fd, $header, DISK_BLOCK_BYTES);
 430     POSIX::close($fd);
 431 }
 432
 433 sub file_unlink {
 434     my ($filename) = @_;
 435
 436     for my $chunk (file_chunks($filename)) {
 437         unlink($chunk) or return 0;
 438     }
 439
 440     return 1;
 441 }
 442
 443 sub filetmp_unlink {
 444     my ($filename) = @_;
 445
 446     for my $chunk (filetmp_chunks($filename)) {
 447         unlink($chunk) or return 0;
 448     }
 449
 450     return 1;
 451 }
 452
 453 sub dir_unlink {
 454     # walk disks, directories, and files with nested loops
 455     for my $disk (disks()) {
 456         my $diskh = IO::Dir->new($disk);
 457         next unless defined $diskh;
 458
 459         while (defined(my $datestr = $diskh->read())) {
 460             next if $datestr eq '.' or $datestr eq '..';
 461
 462             my $dirfn = File::Spec->catfile($disk, $datestr);
 463             next unless _is_datestr($datestr);
 464             next unless -d $dirfn;
 465             rmdir $dirfn;
 466         }
 467     }
 468 }
 469
 470 sub file_size {
 471     my ($filename, $ignore_headers) = @_;
 472     my $total = Math::BigInt->new(0);
 473
 474     for my $chunk (file_chunks($filename)) {
 475         my $sb = stat($chunk);
 476         my $size = Math::BigInt->new($sb->size);
 477         $size -= DISK_BLOCK_BYTES if $ignore_headers;
 478         $size = ($size + 1023) / 1024;
 479
 480         $total += $size;
 481     }
 482
 483     return $total;
 484 }
 485
 486 sub get_files_for_flush {
 487     my (@dateargs) = @_;
 488     my @results;
 489
 490     my $each_file_fn = sub {
 491         my ($filename, $header) = @_;
 492         return if $header->{'type'} != $Amanda::Header::F_DUMPFILE;
 493
 494         if (@dateargs && !grep { $_ eq $header->{'datestamp'}; } @dateargs) {
 495             return;
 496         }
 497
 498         if (!Amanda::Disklist::get_disk($header->{'name'}, $header->{'disk'})) {
 499             return;
 500         }
 501
 502         push @results, $filename;
 503     };
 504     _walk($each_file_fn, 0);
 505
 506     return sort @results;
 507 }
 508
 509 sub get_all_datestamps {
 510     my %datestamps;
 511
 512     my $each_file_fn = sub {
 513         my ($filename, $header) = @_;
 514         return if $header->{'type'} != $Amanda::Header::F_DUMPFILE;
 515
 516         $datestamps{$header->{'datestamp'}} = 1;
 517     };
 518     _walk($each_file_fn, 0);
 519
 520     return sort keys %datestamps;
 521 }