1 /* Functions for dealing with sparse files
3 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any later
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 struct tar_sparse_file;
25 enum sparse_scan_state
32 struct tar_sparse_optab
34 bool (*init) (struct tar_sparse_file *);
35 bool (*done) (struct tar_sparse_file *);
36 bool (*sparse_member_p) (struct tar_sparse_file *);
37 bool (*dump_header) (struct tar_sparse_file *);
38 bool (*fixup_header) (struct tar_sparse_file *);
39 bool (*decode_header) (struct tar_sparse_file *);
40 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
42 bool (*dump_region) (struct tar_sparse_file *, size_t);
43 bool (*extract_region) (struct tar_sparse_file *, size_t);
46 struct tar_sparse_file
48 int fd; /* File descriptor */
49 size_t dumped_size; /* Number of bytes actually written
51 struct tar_stat_info *stat_info; /* Information about the file */
52 struct tar_sparse_optab *optab;
53 void *closure; /* Any additional data optab calls might
58 tar_sparse_member_p (struct tar_sparse_file *file)
60 if (file->optab->sparse_member_p)
61 return file->optab->sparse_member_p (file);
66 tar_sparse_init (struct tar_sparse_file *file)
68 file->dumped_size = 0;
69 if (file->optab->init)
70 return file->optab->init (file);
75 tar_sparse_done (struct tar_sparse_file *file)
77 if (file->optab->done)
78 return file->optab->done (file);
83 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
86 if (file->optab->scan_block)
87 return file->optab->scan_block (file, state, block);
92 tar_sparse_dump_region (struct tar_sparse_file *file, size_t i)
94 if (file->optab->dump_region)
95 return file->optab->dump_region (file, i);
100 tar_sparse_extract_region (struct tar_sparse_file *file, size_t i)
102 if (file->optab->extract_region)
103 return file->optab->extract_region (file, i);
108 tar_sparse_dump_header (struct tar_sparse_file *file)
110 if (file->optab->dump_header)
111 return file->optab->dump_header (file);
116 tar_sparse_decode_header (struct tar_sparse_file *file)
118 if (file->optab->decode_header)
119 return file->optab->decode_header (file);
124 tar_sparse_fixup_header (struct tar_sparse_file *file)
126 if (file->optab->fixup_header)
127 return file->optab->fixup_header (file);
133 lseek_or_error (struct tar_sparse_file *file, off_t offset, int whence)
135 if (lseek (file->fd, offset, whence) < 0)
137 seek_diag_details (file->stat_info->orig_file_name, offset);
143 /* Takes a blockful of data and basically cruises through it to see if
144 it's made *entirely* of zeros, returning a 0 the instant it finds
145 something that is a nonzero, i.e., useful data. */
147 zero_block_p (char *buffer, size_t size)
155 #define clear_block(p) memset (p, 0, BLOCKSIZE);
157 #define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
160 sparse_add_map (struct tar_sparse_file *file, struct sp_array *sp)
162 if (file->stat_info->sparse_map == NULL)
164 file->stat_info->sparse_map =
165 xmalloc (SPARSES_INIT_COUNT * sizeof file->stat_info->sparse_map[0]);
166 file->stat_info->sparse_map_size = SPARSES_INIT_COUNT;
168 else if (file->stat_info->sparse_map_avail == file->stat_info->sparse_map_size)
170 file->stat_info->sparse_map_size *= 2;
171 file->stat_info->sparse_map =
172 xrealloc (file->stat_info->sparse_map,
173 file->stat_info->sparse_map_size
174 * sizeof file->stat_info->sparse_map[0]);
176 file->stat_info->sparse_map[file->stat_info->sparse_map_avail++] = *sp;
179 /* Scan the sparse file and create its map */
181 sparse_scan_file (struct tar_sparse_file *file)
183 static char buffer[BLOCKSIZE];
186 struct sp_array sp = {0, 0};
188 if (!lseek_or_error (file, 0, SEEK_SET))
190 clear_block (buffer);
192 file->stat_info->sparse_map_size = 0;
193 file->stat_info->archive_file_size = 0;
195 if (!tar_sparse_scan (file, scan_begin, NULL))
198 while ((count = safe_read (file->fd, buffer, sizeof buffer)) != 0
199 && count != SAFE_READ_ERROR)
201 /* Analize the block */
202 if (zero_block_p (buffer, count))
206 sparse_add_map (file, &sp);
208 if (!tar_sparse_scan (file, scan_block, NULL))
214 if (sp.numbytes == 0)
216 sp.numbytes += count;
217 file->stat_info->archive_file_size += count;
218 if (!tar_sparse_scan (file, scan_block, buffer))
223 clear_block (buffer);
226 if (sp.numbytes == 0)
229 sparse_add_map (file, &sp);
230 file->stat_info->archive_file_size += count;
231 return tar_sparse_scan (file, scan_end, NULL);
234 static struct tar_sparse_optab oldgnu_optab;
235 static struct tar_sparse_optab star_optab;
236 static struct tar_sparse_optab pax_optab;
239 sparse_select_optab (struct tar_sparse_file *file)
241 switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
248 case GNU_FORMAT: /*FIXME: This one should disappear? */
249 file->optab = &oldgnu_optab;
253 file->optab = &pax_optab;
257 file->optab = &star_optab;
267 sparse_dump_region (struct tar_sparse_file *file, size_t i)
270 off_t bytes_left = file->stat_info->sparse_map[i].numbytes;
272 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset,
276 while (bytes_left > 0)
278 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
281 blk = find_next_block ();
282 memset (blk->buffer, 0, BLOCKSIZE);
283 bytes_read = safe_read (file->fd, blk->buffer, bufsize);
284 if (bytes_read == SAFE_READ_ERROR)
286 read_diag_details (file->stat_info->orig_file_name,
287 file->stat_info->sparse_map[i].offset
288 + file->stat_info->sparse_map[i].numbytes
294 bytes_left -= bytes_read;
295 file->dumped_size += bytes_read;
296 set_next_block_after (blk);
303 sparse_extract_region (struct tar_sparse_file *file, size_t i)
307 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset,
311 write_size = file->stat_info->sparse_map[i].numbytes;
315 /* Last block of the file is a hole */
316 if (sys_truncate (file->fd))
317 truncate_warn (file->stat_info->orig_file_name);
319 else while (write_size > 0)
322 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
323 union block *blk = find_next_block ();
326 ERROR ((0, 0, _("Unexpected EOF in archive")));
329 set_next_block_after (blk);
330 count = full_write (file->fd, blk->buffer, wrbytes);
332 file->dumped_size += count;
333 if (count != wrbytes)
335 write_error_details (file->stat_info->orig_file_name,
345 /* Interface functions */
347 sparse_dump_file (int fd, struct tar_stat_info *st)
350 struct tar_sparse_file file;
355 if (!sparse_select_optab (&file)
356 || !tar_sparse_init (&file))
357 return dump_status_not_implemented;
359 rc = sparse_scan_file (&file);
360 if (rc && file.optab->dump_region)
362 tar_sparse_dump_header (&file);
368 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
369 rc = tar_sparse_dump_region (&file, i);
373 pad_archive(file.stat_info->archive_file_size - file.dumped_size);
374 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
377 /* Returns true if the file represented by stat is a sparse one */
379 sparse_file_p (struct tar_stat_info *st)
381 return (ST_NBLOCKS (st->stat)
382 < (st->stat.st_size / ST_NBLOCKSIZE
383 + (st->stat.st_size % ST_NBLOCKSIZE != 0)));
387 sparse_member_p (struct tar_stat_info *st)
389 struct tar_sparse_file file;
391 if (!sparse_select_optab (&file))
394 return tar_sparse_member_p (&file);
398 sparse_fixup_header (struct tar_stat_info *st)
400 struct tar_sparse_file file;
402 if (!sparse_select_optab (&file))
405 return tar_sparse_fixup_header (&file);
409 sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size)
412 struct tar_sparse_file file;
418 if (!sparse_select_optab (&file)
419 || !tar_sparse_init (&file))
420 return dump_status_not_implemented;
422 rc = tar_sparse_decode_header (&file);
423 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
424 rc = tar_sparse_extract_region (&file, i);
425 *size = file.stat_info->archive_file_size - file.dumped_size;
426 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
430 sparse_skip_file (struct tar_stat_info *st)
433 struct tar_sparse_file file;
438 if (!sparse_select_optab (&file)
439 || !tar_sparse_init (&file))
440 return dump_status_not_implemented;
442 rc = tar_sparse_decode_header (&file);
443 skip_file (file.stat_info->archive_file_size);
444 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
448 static char diff_buffer[BLOCKSIZE];
451 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
453 if (!lseek_or_error (file, beg, SEEK_SET))
459 size_t rdsize = end - beg;
461 if (rdsize > BLOCKSIZE)
463 clear_block (diff_buffer);
464 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
465 if (bytes_read == SAFE_READ_ERROR)
467 read_diag_details (file->stat_info->orig_file_name,
472 if (!zero_block_p (diff_buffer, bytes_read))
474 report_difference (file->stat_info,
475 _("File fragment at %lu is not a hole"), beg);
485 check_data_region (struct tar_sparse_file *file, size_t i)
489 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset,
492 size_left = file->stat_info->sparse_map[i].numbytes;
493 while (size_left > 0)
496 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
498 union block *blk = find_next_block ();
501 ERROR ((0, 0, _("Unexpected EOF in archive")));
504 set_next_block_after (blk);
505 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
506 if (bytes_read == SAFE_READ_ERROR)
508 read_diag_details (file->stat_info->orig_file_name,
509 file->stat_info->sparse_map[i].offset
510 + file->stat_info->sparse_map[i].numbytes
515 file->dumped_size += bytes_read;
516 size_left -= bytes_read;
517 if (memcmp (blk->buffer, diff_buffer, rdsize))
519 report_difference (file->stat_info, _("Contents differ"));
527 sparse_diff_file (int fd, struct tar_stat_info *st)
530 struct tar_sparse_file file;
537 if (!sparse_select_optab (&file)
538 || !tar_sparse_init (&file))
539 return dump_status_not_implemented;
541 rc = tar_sparse_decode_header (&file);
542 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
544 rc = check_sparse_region (&file,
545 offset, file.stat_info->sparse_map[i].offset)
546 && check_data_region (&file, i);
547 offset = file.stat_info->sparse_map[i].offset
548 + file.stat_info->sparse_map[i].numbytes;
552 skip_file (file.stat_info->archive_file_size - file.dumped_size);
554 tar_sparse_done (&file);
559 /* Old GNU Format. The sparse file information is stored in the
560 oldgnu_header in the following manner:
562 The header is marked with type 'S'. Its `size' field contains
563 the cumulative size of all non-empty blocks of the file. The
564 actual file size is stored in `realsize' member of oldgnu_header.
566 The map of the file is stored in a list of `struct sparse'.
567 Each struct contains offset to the block of data and its
568 size (both as octal numbers). The first file header contains
569 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
570 contains more structs, then the field `isextended' of the main
571 header is set to 1 (binary) and the `struct sparse_header'
572 header follows, containing at most 21 following structs
573 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
574 field of the extended header is set and next next extension header
577 enum oldgnu_add_status
585 oldgnu_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
587 return current_header->header.typeflag == GNUTYPE_SPARSE;
590 /* Add a sparse item to the sparse file and its obstack */
591 static enum oldgnu_add_status
592 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
596 if (s->numbytes[0] == '\0')
598 sp.offset = OFF_FROM_HEADER (s->offset);
599 sp.numbytes = SIZE_FROM_HEADER (s->numbytes);
601 || file->stat_info->stat.st_size < sp.offset + sp.numbytes
602 || file->stat_info->archive_file_size < 0)
605 sparse_add_map (file, &sp);
610 oldgnu_fixup_header (struct tar_sparse_file *file)
612 /* NOTE! st_size was initialized from the header
613 which actually contains archived size. The following fixes it */
614 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
615 file->stat_info->stat.st_size =
616 OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
620 /* Convert old GNU format sparse data to internal representation */
622 oldgnu_get_sparse_info (struct tar_sparse_file *file)
625 union block *h = current_header;
627 static enum oldgnu_add_status rc;
629 file->stat_info->sparse_map_size = 0;
630 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
632 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
637 for (ext_p = h->oldgnu_header.isextended;
638 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
640 h = find_next_block ();
643 ERROR ((0, 0, _("Unexpected EOF in archive")));
646 set_next_block_after (h);
647 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
648 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
653 ERROR ((0, 0, _("%s: invalid sparse archive member"),
654 file->stat_info->orig_file_name));
661 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
662 struct sparse *sp, size_t sparse_size)
664 for (; *pindex < file->stat_info->sparse_map_avail
665 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
667 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
669 SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
675 oldgnu_dump_header (struct tar_sparse_file *file)
677 off_t block_ordinal = current_block_ordinal ();
681 blk = start_header (file->stat_info);
682 blk->header.typeflag = GNUTYPE_SPARSE;
683 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
684 blk->oldgnu_header.isextended = 1;
686 /* Store the real file size */
687 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
688 /* Store the effective (shrunken) file size */
689 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
692 oldgnu_store_sparse_info (file, &i,
693 blk->oldgnu_header.sp,
694 SPARSES_IN_OLDGNU_HEADER);
695 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
696 finish_header (file->stat_info, blk, block_ordinal);
698 while (i < file->stat_info->sparse_map_avail)
700 blk = find_next_block ();
701 memset (blk->buffer, 0, BLOCKSIZE);
702 oldgnu_store_sparse_info (file, &i,
703 blk->sparse_header.sp,
704 SPARSES_IN_SPARSE_HEADER);
705 set_next_block_after (blk);
706 if (i < file->stat_info->sparse_map_avail)
707 blk->sparse_header.isextended = 1;
714 static struct tar_sparse_optab oldgnu_optab = {
715 NULL, /* No init function */
716 NULL, /* No done function */
717 oldgnu_sparse_member_p,
720 oldgnu_get_sparse_info,
721 NULL, /* No scan_block function */
723 sparse_extract_region,
730 star_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
732 return current_header->header.typeflag == GNUTYPE_SPARSE;
736 star_fixup_header (struct tar_sparse_file *file)
738 /* NOTE! st_size was initialized from the header
739 which actually contains archived size. The following fixes it */
740 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
741 file->stat_info->stat.st_size =
742 OFF_FROM_HEADER (current_header->star_in_header.realsize);
746 /* Convert STAR format sparse data to internal representation */
748 star_get_sparse_info (struct tar_sparse_file *file)
751 union block *h = current_header;
753 static enum oldgnu_add_status rc;
755 file->stat_info->sparse_map_size = 0;
757 if (h->star_in_header.prefix[0] == '\0'
758 && h->star_in_header.sp[0].offset[10] != '\0')
760 /* Old star format */
761 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
763 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
767 ext_p = h->star_in_header.isextended;
772 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
774 h = find_next_block ();
777 ERROR ((0, 0, _("Unexpected EOF in archive")));
780 set_next_block_after (h);
781 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
782 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
787 ERROR ((0, 0, _("%s: invalid sparse archive member"),
788 file->stat_info->orig_file_name));
795 static struct tar_sparse_optab star_optab = {
796 NULL, /* No init function */
797 NULL, /* No done function */
798 star_sparse_member_p,
801 star_get_sparse_info,
802 NULL, /* No scan_block function */
803 NULL, /* No dump region function */
804 sparse_extract_region,
808 /* GNU PAX sparse file format. The sparse file map is stored in
811 GNU.sparse.size Real size of the stored file
812 GNU.sparse.numblocks Number of blocks in the sparse map
813 repeat numblocks time
814 GNU.sparse.offset Offset of the next data block
815 GNU.sparse.numbytes Size of the next data block
820 pax_sparse_member_p (struct tar_sparse_file *file)
822 return file->stat_info->archive_file_size != file->stat_info->stat.st_size;
826 pax_dump_header (struct tar_sparse_file *file)
828 off_t block_ordinal = current_block_ordinal ();
832 /* Store the real file size */
833 xheader_store ("GNU.sparse.size", file->stat_info, NULL);
834 xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
835 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
837 xheader_store ("GNU.sparse.offset", file->stat_info, &i);
838 xheader_store ("GNU.sparse.numbytes", file->stat_info, &i);
841 blk = start_header (file->stat_info);
842 /* Store the effective (shrunken) file size */
843 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
844 finish_header (file->stat_info, blk, block_ordinal);
848 static struct tar_sparse_optab pax_optab = {
849 NULL, /* No init function */
850 NULL, /* No done function */
853 NULL, /* No decode_header function */
854 NULL, /* No fixup_header function */
855 NULL, /* No scan_block function */
857 sparse_extract_region,