git.gag.com Git - debian/amanda/blob - perl/Amanda/Holding.pm

   1 # Copyright (c) 2009 Zmanda, Inc.  All Rights Reserved.
   2 #
   3 # This program is free software; you can redistribute it and/or modify it
   4 # under the terms of the GNU General Public License version 2 as published
   5 # by the Free Software Foundation.
   6 #
   7 # This program is distributed in the hope that it will be useful, but
   8 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   9 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  10 # for more details.
  11 #
  12 # You should have received a copy of the GNU General Public License along
  13 # with this program; if not, write to the Free Software Foundation, Inc.,
  14 # 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  15 #
  16 # Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
  17 # Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  18
  19 package Amanda::Holding;
  20
  21 use base qw( Exporter );
  22 use File::Spec;
  23 use File::stat;
  24 use IO::Dir;
  25 use POSIX qw( :fcntl_h );
  26 use Math::BigInt;
  27 use strict;
  28 use warnings;
  29
  30 use Amanda::Config qw( :getconf );
  31 use Amanda::Debug qw( debug );
  32 use Amanda::Header;
  33 use Amanda::Disklist;
  34 use Amanda::Util;
  35
  36 =head1 NAME
  37
  38 Amanda::Holding -- interface to the holding disks
  39
  40 =head1 SYNOPSIS
  41
  42     use Amanda::Holding;
  43
  44 Get some statistics:
  45
  46     my %size_per_host;
  47     for my $hfile (Amanda::Holding::files()) {
  48         my $hdr = Amanda::Holding::get_header($hfile);
  49         next unless $hdr;
  50         $size_per_host{$hdr->{'name'}} += Amanda::Holding::file_size($hfile);
  51     }
  52
  53 Schematic for something like C<amflush>:
  54
  55     for my $ts (sort Amanda::Holding::get_all_timestamps()) {
  56         print $ts, "\n";
  57     }
  58     my @to_dump = <>;
  59     for my $hfile (Amanda::Holding::get_files_for_flush(@to_dump)) {
  60         # flush $hfile
  61     }
  62
  63 =head1 DESCRIPTION
  64
  65 =head2 TERMINOLOGY
  66
  67 =over
  68
  69 =item Holding disk
  70
  71 A holding disk is a directory given in a holdingdisk definition in
  72 C<amanda.conf>.
  73
  74 =item Holding directory
  75
  76 A holding directory is a subdirectory of a holding disk, generally named by
  77 timestamp.  Note, however, that this package does not interpret holding
  78 directory names as timestamps, and does not provide direct access to holding
  79 directories.
  80
  81 =item Holding file
  82
  83 A holding file describes one or more os-level files (holding file chunks) in a
  84 holding directory, together representing a single dump file.
  85
  86 =item Holding chunk
  87
  88 A holding chunk is an individual os-level file representing part of a holding
  89 file.  Chunks are kept small to avoid hitting filesystem size ilmits, and are
  90 linked together internally by filename.
  91
  92 =back
  93
  94 By way of example:
  95
  96   /data/holding                               <-- holding disk
  97   /data/holding/20070306123456                <-- holding directory
  98   /data/holding/20070306123456/raj._video_a   <-- holding file and chunk
  99   /data/holding/20070306123456/raj._video_a.1 <-- holding chunk
 100
 101 =head2 CONSTANTS
 102
 103 Holding-disk files do not have a block size, so the size of the header is fixed
 104 at 32k.  Rather than hard-code that value, use the constant DISK_BLOCK_BYTES
 105 from this package.
 106
 107 =head2 FUNCTIONS
 108
 109 Note that this package assumes that a config has been loaded (see
 110 L<Amanda::Config>).
 111
 112 These three functions provide basic access to holding disks, files, and chunks:
 113
 114 =over
 115
 116 =item C<disks()>
 117
 118 returns an list of active disks, each represented as a string.  This does not
 119 return holding disks which are defined in C<amanda.conf> but not used.
 120
 121 =item C<files()>
 122
 123 returns a list of active holding files on all disks.  Note that a dump may span
 124 multiple disks, so there is no use in selecting files only on certain holding
 125 disks.
 126
 127 =item C<file_chunks($file)>
 128
 129 returns a list of chunks for the given file.  Chunk filenames are always fully
 130 qualified pathnames.
 131
 132 =back
 133
 134 C<Amanda::Holding> provides a few utility functions on holding files.  Note
 135 that these functions require fully qualified pathnames.
 136
 137 =over
 138
 139 =item C<file_size($file, $ignore_headers)>
 140
 141 returns the size of the holding file I<in kilobytes>, ignoring the size of the
 142 headers if C<$ignore_headers> is true.
 143
 144 =item C<file_unlink($file)>
 145
 146 unlinks (deletes) all chunks comprising C<$file>, returning true on success.
 147
 148 =item C<get_header($file)>
 149
 150 reads and returns the header (see L<Amanda::Header>) for C<$file>.
 151
 152 =back
 153
 154 The remaining two functions are utilities for amflush and related tools:
 155
 156 =over
 157
 158 =item C<get_all_timestamps()>
 159
 160 returns a sorted list of all timestamps with dumps in any active holding disk.
 161
 162 =item C<get_files_for_flush(@timestamps)>
 163
 164 returns a sorted list of files matching any of the supplied timestamps.  Files
 165 for which no DLE exists in the disklist are ignored.  If no timestamps are
 166 provided, then all timestamps are considered.
 167
 168 =back
 169
 170 =cut
 171
 172 use constant DISK_BLOCK_BYTES => 32768;
 173
 174 our @EXPORT_OK = qw(dirs files file_chunks
 175     get_files_for_flush get_all_datestamps
 176     file_size file_unlink get_header);
 177
 178 ##
 179 # utility subs
 180
 181 sub _is_datestr {
 182     my ($str) = @_;
 183
 184     return 0
 185         unless (my ($year, $month, $day, $hour, $min, $sec) =
 186             ($str =~ /(\d{4})(\d{2})(\d{2})(?:(\d{2})(\d{2})(\d{2}))/));
 187
 188     return 0 if ($year < 1990 || $year > 2999);
 189     return 0 if ($month < 1 || $month > 12);
 190     return 0 if ($day < 1 || $day > 31);
 191
 192     return 0 if (defined $hour and $hour > 23);
 193     return 0 if (defined $min and $min > 60);
 194     return 0 if (defined $sec and $sec > 60);
 195
 196     return 1;
 197 }
 198
 199 sub _walk {
 200     my ($file_fn, $verbose) = @_;
 201
 202     # walk disks, directories, and files with nested loops
 203     for my $disk (disks()) {
 204         my $diskh = IO::Dir->new($disk);
 205         if (!defined $diskh) {
 206             print $verbose "could not open holding dir '$disk': $!\n" if $verbose;
 207             next;
 208         }
 209
 210         while (defined(my $datestr = $diskh->read())) {
 211             next if $datestr eq '.' or $datestr eq '..';
 212
 213             my $dirfn = File::Spec->catfile($disk, $datestr);
 214
 215             if (!_is_datestr($datestr)) {
 216                 print $verbose "holding dir '$dirfn' is not a datestamp\n" if $verbose;
 217                 next;
 218             }
 219             if (!-d $dirfn) {
 220                 print $verbose "holding dir '$dirfn' is not a directory\n" if $verbose;
 221                 next;
 222             }
 223
 224             my $dirh = IO::Dir->new($dirfn);
 225             while (defined(my $dirent = $dirh->read)) {
 226                 next if $dirent eq '.' or $dirent eq '..';
 227
 228                 my $filename = File::Spec->catfile($disk, $datestr, $dirent);
 229                 if (!-f $filename) {
 230                     print $verbose "holding file '$filename' is not a file\n" if $verbose;
 231                     next;
 232                 }
 233
 234                 my $hdr = get_header($filename);
 235                 next unless defined($hdr);
 236
 237                 $file_fn->($filename, $hdr);
 238             }
 239         }
 240     }
 241 }
 242
 243 ##
 244 # Package functions
 245
 246 sub disks {
 247     my @results;
 248
 249     for my $hdname (@{getconf($CNF_HOLDINGDISK)}) {
 250         my $cfg = lookup_holdingdisk($hdname);
 251         next unless defined $cfg;
 252
 253         my $dir = holdingdisk_getconf($cfg, $HOLDING_DISKDIR);
 254         next unless defined $dir;
 255         next unless -d $dir;
 256         push @results, $dir;
 257     }
 258
 259     return @results;
 260 }
 261
 262 sub files {
 263     my $verbose = shift;
 264     my @results;
 265
 266     my $each_file_fn = sub {
 267         my ($filename, $header) = @_;
 268         return if $header->{'type'} != $Amanda::Header::F_DUMPFILE;
 269
 270         push @results, $filename;
 271     };
 272     _walk($each_file_fn, $verbose);
 273
 274     return @results;
 275 }
 276
 277 sub all_files {
 278     my $verbose = shift;
 279     my @results;
 280
 281     my $each_file_fn = sub {
 282         my ($filename, $header) = @_;
 283         push @results, { filename => $filename, header => $header };
 284     };
 285     _walk($each_file_fn, $verbose);
 286
 287     return @results;
 288 }
 289
 290 sub merge_all_files {
 291     my @files = @_;
 292     my %hfiles;
 293     my @result;
 294
 295     for my $file (@files) {
 296         $hfiles{$file->{'filename'}} = $file->{'header'};
 297     }
 298
 299     foreach my $filename (keys %hfiles) {
 300         next if !exists $hfiles{$filename};
 301         if ($hfiles{$filename}->{'type'} == $Amanda::Header::F_DUMPFILE) {
 302             push @result, {filename => $filename, header => $hfiles{$filename}};
 303             my $is_tmp = ($filename =~ /\.tmp$/);
 304             my $cont_filename = $filename;
 305             my $cfilename = $hfiles{$cont_filename}->{'cont_filename'};
 306             my $cf = $cfilename;
 307             $cf .= ".tmp" if $is_tmp;
 308             while (defined $cfilename && $cfilename ne "" && -f $cf) {
 309                 delete $hfiles{$cont_filename};
 310                 $cont_filename = $cf;
 311                 $cfilename = $hfiles{$cont_filename}->{'cont_filename'};
 312                 $cf = $cfilename;
 313                 $cf .= ".tmp" if $is_tmp;
 314             }
 315             delete $hfiles{$cont_filename};
 316         } elsif ($hfiles{$filename}->{'type'} != $Amanda::Header::F_CONT_DUMPFILE) {
 317             push @result, {filename => $filename, header => $hfiles{$filename}};
 318             delete $hfiles{$filename}
 319         } else {
 320            # do nothing for F_CONTFILE
 321         }
 322     }
 323
 324     foreach my $filename (keys %hfiles) {
 325         next if !exists $hfiles{$filename};
 326         if ($hfiles{$filename}->{'type'} == $Amanda::Header::F_CONT_DUMPFILE) {
 327             push @result, {filename => $filename, header => $hfiles{$filename}};
 328         } else {
 329             delete $hfiles{$filename}
 330         }
 331     }
 332     return @result;
 333 }
 334
 335 sub file_chunks {
 336     my ($filename) = @_;
 337     my @results;
 338
 339     while (1) {
 340         last unless -f $filename;
 341         my $hdr = get_header($filename);
 342         last unless defined($hdr);
 343
 344         push @results, $filename;
 345
 346         if ($hdr->{'cont_filename'}) {
 347             $filename = $hdr->{'cont_filename'};
 348         } else {
 349             # no continuation -> we're done
 350             last;
 351         }
 352     }
 353
 354     return @results;
 355 }
 356
 357 sub file_tmp_chunks {
 358     my ($filename) = @_;
 359     my @results;
 360
 361     while (1) {
 362         last unless -f $filename;
 363         my $hdr = get_header($filename);
 364         last unless defined($hdr);
 365
 366         push @results, $filename;
 367
 368         if ($hdr->{'cont_filename'}) {
 369             $filename = $hdr->{'cont_filename'} . ".tmp";
 370         } else {
 371             # no continuation -> we're done
 372             last;
 373         }
 374     }
 375
 376     return @results;
 377 }
 378
 379 sub rename_tmp {
 380     my ($filename) = shift;
 381     my ($complete) = shift;
 382
 383     my @files = file_tmp_chunks($filename);
 384     while (my $tmp_filename = pop @files) {
 385         my $hdr = get_header($tmp_filename);
 386         if ($hdr->{'is_partial'} == 0 and $complete == 0) {
 387             $hdr->{'is_partial'} = 1;
 388             write_header($tmp_filename, $hdr);
 389         }
 390         my $hfilename = $tmp_filename;
 391         $hfilename =~ s/\.tmp$//;
 392         rename $tmp_filename, $hfilename;
 393     }
 394
 395     return
 396 }
 397
 398 sub get_header {
 399     my ($filename) = @_;
 400     return unless -f $filename;
 401
 402     my $fd = POSIX::open($filename, O_RDONLY);
 403     return unless $fd;
 404
 405     my $hdr_bytes = Amanda::Util::full_read($fd, DISK_BLOCK_BYTES);
 406     POSIX::close($fd);
 407     if (length($hdr_bytes) == 0) {
 408         my $hdr = Amanda::Header->new();
 409         $hdr->{'type'} = $Amanda::Header::F_EMPTY;
 410         return $hdr;
 411     } elsif (length($hdr_bytes) < DISK_BLOCK_BYTES) {
 412         my $hdr = Amanda::Header->new();
 413         $hdr->{'type'} = $Amanda::Header::F_UNKNOWN;
 414         return $hdr;
 415     }
 416
 417     return Amanda::Header->from_string($hdr_bytes);
 418 }
 419
 420 sub write_header {
 421     my $filename = shift;
 422     my $hdr = shift;
 423
 424     return unless -f $filename;
 425     my $fd = POSIX::open($filename, O_RDWR);
 426     return unless $fd;
 427     my $header = $hdr->to_string(DISK_BLOCK_BYTES, DISK_BLOCK_BYTES);
 428     Amanda::Util::full_write($fd, $header, DISK_BLOCK_BYTES);
 429     POSIX::close($fd);
 430 }
 431
 432 sub file_unlink {
 433     my ($filename) = @_;
 434
 435     for my $chunk (file_chunks($filename)) {
 436         unlink($chunk) or return 0;
 437     }
 438
 439     return 1;
 440 }
 441
 442 sub filetmp_unlink {
 443     my ($filename) = @_;
 444
 445     for my $chunk (filetmp_chunks($filename)) {
 446         unlink($chunk) or return 0;
 447     }
 448
 449     return 1;
 450 }
 451
 452 sub dir_unlink {
 453     # walk disks, directories, and files with nested loops
 454     for my $disk (disks()) {
 455         my $diskh = IO::Dir->new($disk);
 456         next unless defined $diskh;
 457
 458         while (defined(my $datestr = $diskh->read())) {
 459             next if $datestr eq '.' or $datestr eq '..';
 460
 461             my $dirfn = File::Spec->catfile($disk, $datestr);
 462             next unless _is_datestr($datestr);
 463             next unless -d $dirfn;
 464             rmdir $dirfn;
 465         }
 466     }
 467 }
 468
 469 sub file_size {
 470     my ($filename, $ignore_headers) = @_;
 471     my $total = Math::BigInt->new(0);
 472
 473     for my $chunk (file_chunks($filename)) {
 474         my $sb = stat($chunk);
 475         my $size = Math::BigInt->new($sb->size);
 476         $size -= DISK_BLOCK_BYTES if $ignore_headers;
 477         $size = ($size + 1023) / 1024;
 478
 479         $total += $size;
 480     }
 481
 482     return $total;
 483 }
 484
 485 sub get_files_for_flush {
 486     my (@dateargs) = @_;
 487     my @results;
 488
 489     my $each_file_fn = sub {
 490         my ($filename, $header) = @_;
 491         return if $header->{'type'} != $Amanda::Header::F_DUMPFILE;
 492
 493         if (@dateargs && !grep { $_ eq $header->{'datestamp'}; } @dateargs) {
 494             return;
 495         }
 496
 497         if (!Amanda::Disklist::get_disk($header->{'name'}, $header->{'disk'})) {
 498             return;
 499         }
 500
 501         push @results, $filename;
 502     };
 503     _walk($each_file_fn, 0);
 504
 505     return sort @results;
 506 }
 507
 508 sub get_all_datestamps {
 509     my %datestamps;
 510
 511     my $each_file_fn = sub {
 512         my ($filename, $header) = @_;
 513         return if $header->{'type'} != $Amanda::Header::F_DUMPFILE;
 514
 515         $datestamps{$header->{'datestamp'}} = 1;
 516     };
 517     _walk($each_file_fn, 0);
 518
 519     return sort keys %datestamps;
 520 }