From 97f825c37400d5684e096fe57c244e1eb20b509f Mon Sep 17 00:00:00 2001 From: Bdale Garbee Date: Fri, 15 Jul 2011 16:45:35 -0600 Subject: [PATCH] re-merge the --rsyncable patch --- debian/patches/rsyncable.diff | 256 ++++++++++++++++++++++++++++++++++ debian/patches/series | 1 + deflate.c | 96 ++++++++++++- doc/gzip.texi | 8 ++ gzip.c | 6 + gzip.h | 1 + 6 files changed, 364 insertions(+), 4 deletions(-) create mode 100644 debian/patches/rsyncable.diff diff --git a/debian/patches/rsyncable.diff b/debian/patches/rsyncable.diff new file mode 100644 index 0000000..96780f3 --- /dev/null +++ b/debian/patches/rsyncable.diff @@ -0,0 +1,256 @@ +diff --git a/deflate.c b/deflate.c +index 0950391..1b3ac52 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -131,6 +131,14 @@ + #endif + /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + ++#ifndef RSYNC_WIN ++# define RSYNC_WIN 8192 ++#endif ++/* Size of rsync window, must be < MAX_DIST */ ++ ++#define RSYNC_SUM_MATCH(sum) (((sum) & (RSYNC_WIN - 1)) == 0) ++/* Whether window sum matches magic value */ ++ + /* =========================================================================== + * Local data used by the "longest match" routines. + */ +@@ -212,6 +220,8 @@ local int compr_level; + unsigned near good_match; + /* Use a faster search when the previous match is longer than this */ + ++local ulg rsync_sum; /* rolling sum of rsync window */ ++local ulg rsync_chunk_end; /* next rsync sequence point */ + + /* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to +@@ -307,6 +317,10 @@ void lm_init (pack_level, flags) + #endif + /* prev will be initialized on the fly */ + ++ /* rsync params */ ++ rsync_chunk_end = 0xFFFFFFFFUL; ++ rsync_sum = 0; ++ + /* Set the default configuration parameters: + */ + max_lazy_match = configuration_table[pack_level].max_lazy; +@@ -324,6 +338,7 @@ void lm_init (pack_level, flags) + + strstart = 0; + block_start = 0L; ++ rsync_chunk_end = 0xFFFFFFFFUL; + #ifdef ASMV + match_init(); /* initialize the asm code */ + #endif +@@ -543,6 +558,8 @@ local void fill_window() + memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE); + match_start -= WSIZE; + strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ ++ if (rsync_chunk_end != 0xFFFFFFFFUL) ++ rsync_chunk_end -= WSIZE; + + block_start -= (long) WSIZE; + +@@ -570,6 +587,39 @@ local void fill_window() + } + } + ++local void rsync_roll(start, num) ++ unsigned start; ++ unsigned num; ++{ ++ unsigned i; ++ ++ if (start < RSYNC_WIN) { ++ /* before window fills. */ ++ for (i = start; i < RSYNC_WIN; i++) { ++ if (i == start + num) return; ++ rsync_sum += (ulg)window[i]; ++ } ++ num -= (RSYNC_WIN - start); ++ start = RSYNC_WIN; ++ } ++ ++ /* buffer after window full */ ++ for (i = start; i < start+num; i++) { ++ /* New character in */ ++ rsync_sum += (ulg)window[i]; ++ /* Old character out */ ++ rsync_sum -= (ulg)window[i - RSYNC_WIN]; ++ if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum)) ++ rsync_chunk_end = i; ++ } ++} ++ ++/* =========================================================================== ++ * Set rsync_chunk_end if window sum matches magic value. ++ */ ++#define RSYNC_ROLL(s, n) \ ++ do { if (rsync) rsync_roll((s), (n)); } while(0) ++ + /* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. +@@ -617,6 +667,7 @@ local off_t deflate_fast() + + lookahead -= match_length; + ++ RSYNC_ROLL(strstart, match_length); + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +@@ -645,9 +696,18 @@ local off_t deflate_fast() + /* No match, output a literal byte */ + Tracevv((stderr,"%c",window[strstart])); + flush = ct_tally (0, window[strstart]); ++ RSYNC_ROLL(strstart, 1); + lookahead--; + strstart++; + } ++ if (rsync && strstart > rsync_chunk_end) { ++ ush attr = 0; /* ascii/binary flag */ ++ ++ flush = 1; ++ /* Reset huffman tree */ ++ ct_init(&attr, &method); ++ rsync_chunk_end = 0xFFFFFFFFUL; ++ } + if (flush) FLUSH_BLOCK(0), block_start = strstart; + + /* Make sure that we always have enough lookahead, except +@@ -721,6 +781,7 @@ off_t deflate() + */ + lookahead -= prev_length-1; + prev_length -= 2; ++ RSYNC_ROLL(strstart, prev_length+1); + do { + strstart++; + INSERT_STRING(strstart, hash_head); +@@ -733,24 +794,51 @@ off_t deflate() + match_available = 0; + match_length = MIN_MATCH-1; + strstart++; +- if (flush) FLUSH_BLOCK(0), block_start = strstart; + ++ if (rsync && strstart > rsync_chunk_end) { ++ ush attr = 0; /* ascii/binary flag */ ++ ++ /* Reset huffman tree */ ++ ct_init(&attr, &method); ++ rsync_chunk_end = 0xFFFFFFFFUL; ++ flush = 1; ++ } ++ if (flush) FLUSH_BLOCK(0), block_start = strstart; + } else if (match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c",window[strstart-1])); +- if (ct_tally (0, window[strstart-1])) { +- FLUSH_BLOCK(0), block_start = strstart; +- } ++ flush = ct_tally (0, window[strstart-1]); ++ if (rsync && strstart > rsync_chunk_end) { ++ ush attr = 0; /* ascii/binary flag */ ++ ++ /* Reset huffman tree */ ++ ct_init(&attr, &method); ++ rsync_chunk_end = 0xFFFFFFFFUL; ++ ++ flush = 1; ++ } ++ if (flush) FLUSH_BLOCK(0), block_start = strstart; ++ RSYNC_ROLL(strstart, 1); + strstart++; + lookahead--; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ ++ if (rsync && strstart > rsync_chunk_end) { ++ ush attr = 0; /* ascii/binary flag */ ++ ++ /* Reset huffman tree */ ++ ct_init(&attr, &method); ++ rsync_chunk_end = 0xFFFFFFFFUL; ++ ++ FLUSH_BLOCK(0), block_start = strstart; ++ } + match_available = 1; ++ RSYNC_ROLL(strstart, 1); + strstart++; + lookahead--; + } +diff --git a/doc/gzip.texi b/doc/gzip.texi +index a6009d2..84887c3 100644 +--- a/doc/gzip.texi ++++ b/doc/gzip.texi +@@ -353,6 +353,14 @@ specified on the command line are directories, @command{gzip} will descend + into the directory and compress all the files it finds there (or + decompress them in the case of @command{gunzip}). + ++@item --rsyncable ++While compressing, synchronize the output occasionally based on the ++input. This can reduce the compression slightly in some cases, but ++means that the @code{rsync} program can take advantage of similarities ++in the uncompressed input when syncronizing two files compressed with ++this flag. @code{gunzip} cannot tell the difference between a ++compressed file created with this option, and one created without it. ++ + @item --suffix @var{suf} + @itemx -S @var{suf} + Use suffix @var{suf} instead of @samp{.gz}. Any suffix can be +diff --git a/gzip.c b/gzip.c +index 9e2a890..b867350 100644 +--- a/gzip.c ++++ b/gzip.c +@@ -218,6 +218,7 @@ int ofd; /* output file descriptor */ + unsigned insize; /* valid bytes in inbuf */ + unsigned inptr; /* index of next byte to be processed in inbuf */ + unsigned outcnt; /* bytes in output buffer */ ++int rsync = 0; /* make ryncable chunks */ + + static int handled_sig[] = + { +@@ -271,6 +272,7 @@ struct option longopts[] = + {"best", 0, 0, '9'}, /* compress better */ + {"lzw", 0, 0, 'Z'}, /* make output compatible with old compress */ + {"bits", 1, 0, 'b'}, /* max number of bits per code (implies -Z) */ ++ {"rsyncable", 0, 0, 'R'}, /* make rsync-friendly archive */ + { 0, 0, 0, 0 } + }; + +@@ -352,6 +354,7 @@ local void help() + " -Z, --lzw produce output compatible with old compress", + " -b, --bits=BITS max number of bits per code (implies -Z)", + #endif ++ " --rsyncable Make rsync-friendly archive", + "", + "With no FILE, or when FILE is -, read standard input.", + "", +@@ -479,6 +482,9 @@ int main (int argc, char **argv) + recursive = 1; + #endif + break; ++ case 'R': ++ rsync = 1; break; ++ + case 'S': + #ifdef NO_MULTIPLE_DOTS + if (*optarg == '.') optarg++; +diff --git a/gzip.h b/gzip.h +index 0c3dd68..5270c56 100644 +--- a/gzip.h ++++ b/gzip.h +@@ -146,6 +146,7 @@ EXTERN(uch, window); /* Sliding window and suffix table (unlzw) */ + extern unsigned insize; /* valid bytes in inbuf */ + extern unsigned inptr; /* index of next byte to be processed in inbuf */ + extern unsigned outcnt; /* bytes in output buffer */ ++extern int rsync; /* deflate into rsyncable chunks */ + + extern off_t bytes_in; /* number of input bytes */ + extern off_t bytes_out; /* number of output bytes */ diff --git a/debian/patches/series b/debian/patches/series index f008915..0c9a1ea 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,2 +1,3 @@ getdtablesize-missing.diff +rsyncable.diff zless-LESSOPEN.diff diff --git a/deflate.c b/deflate.c index 0950391..1b3ac52 100644 --- a/deflate.c +++ b/deflate.c @@ -131,6 +131,14 @@ #endif /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ +#ifndef RSYNC_WIN +# define RSYNC_WIN 8192 +#endif +/* Size of rsync window, must be < MAX_DIST */ + +#define RSYNC_SUM_MATCH(sum) (((sum) & (RSYNC_WIN - 1)) == 0) +/* Whether window sum matches magic value */ + /* =========================================================================== * Local data used by the "longest match" routines. */ @@ -212,6 +220,8 @@ local int compr_level; unsigned near good_match; /* Use a faster search when the previous match is longer than this */ +local ulg rsync_sum; /* rolling sum of rsync window */ +local ulg rsync_chunk_end; /* next rsync sequence point */ /* Values for max_lazy_match, good_match and max_chain_length, depending on * the desired pack level (0..9). The values given below have been tuned to @@ -307,6 +317,10 @@ void lm_init (pack_level, flags) #endif /* prev will be initialized on the fly */ + /* rsync params */ + rsync_chunk_end = 0xFFFFFFFFUL; + rsync_sum = 0; + /* Set the default configuration parameters: */ max_lazy_match = configuration_table[pack_level].max_lazy; @@ -324,6 +338,7 @@ void lm_init (pack_level, flags) strstart = 0; block_start = 0L; + rsync_chunk_end = 0xFFFFFFFFUL; #ifdef ASMV match_init(); /* initialize the asm code */ #endif @@ -543,6 +558,8 @@ local void fill_window() memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE); match_start -= WSIZE; strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ + if (rsync_chunk_end != 0xFFFFFFFFUL) + rsync_chunk_end -= WSIZE; block_start -= (long) WSIZE; @@ -570,6 +587,39 @@ local void fill_window() } } +local void rsync_roll(start, num) + unsigned start; + unsigned num; +{ + unsigned i; + + if (start < RSYNC_WIN) { + /* before window fills. */ + for (i = start; i < RSYNC_WIN; i++) { + if (i == start + num) return; + rsync_sum += (ulg)window[i]; + } + num -= (RSYNC_WIN - start); + start = RSYNC_WIN; + } + + /* buffer after window full */ + for (i = start; i < start+num; i++) { + /* New character in */ + rsync_sum += (ulg)window[i]; + /* Old character out */ + rsync_sum -= (ulg)window[i - RSYNC_WIN]; + if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum)) + rsync_chunk_end = i; + } +} + +/* =========================================================================== + * Set rsync_chunk_end if window sum matches magic value. + */ +#define RSYNC_ROLL(s, n) \ + do { if (rsync) rsync_roll((s), (n)); } while(0) + /* =========================================================================== * Flush the current block, with given end-of-file flag. * IN assertion: strstart is set to the end of the current match. @@ -617,6 +667,7 @@ local off_t deflate_fast() lookahead -= match_length; + RSYNC_ROLL(strstart, match_length); /* Insert new strings in the hash table only if the match length * is not too large. This saves time but degrades compression. */ @@ -645,9 +696,18 @@ local off_t deflate_fast() /* No match, output a literal byte */ Tracevv((stderr,"%c",window[strstart])); flush = ct_tally (0, window[strstart]); + RSYNC_ROLL(strstart, 1); lookahead--; strstart++; } + if (rsync && strstart > rsync_chunk_end) { + ush attr = 0; /* ascii/binary flag */ + + flush = 1; + /* Reset huffman tree */ + ct_init(&attr, &method); + rsync_chunk_end = 0xFFFFFFFFUL; + } if (flush) FLUSH_BLOCK(0), block_start = strstart; /* Make sure that we always have enough lookahead, except @@ -721,6 +781,7 @@ off_t deflate() */ lookahead -= prev_length-1; prev_length -= 2; + RSYNC_ROLL(strstart, prev_length+1); do { strstart++; INSERT_STRING(strstart, hash_head); @@ -733,24 +794,51 @@ off_t deflate() match_available = 0; match_length = MIN_MATCH-1; strstart++; - if (flush) FLUSH_BLOCK(0), block_start = strstart; + if (rsync && strstart > rsync_chunk_end) { + ush attr = 0; /* ascii/binary flag */ + + /* Reset huffman tree */ + ct_init(&attr, &method); + rsync_chunk_end = 0xFFFFFFFFUL; + flush = 1; + } + if (flush) FLUSH_BLOCK(0), block_start = strstart; } else if (match_available) { /* If there was no match at the previous position, output a * single literal. If there was a match but the current match * is longer, truncate the previous match to a single literal. */ Tracevv((stderr,"%c",window[strstart-1])); - if (ct_tally (0, window[strstart-1])) { - FLUSH_BLOCK(0), block_start = strstart; - } + flush = ct_tally (0, window[strstart-1]); + if (rsync && strstart > rsync_chunk_end) { + ush attr = 0; /* ascii/binary flag */ + + /* Reset huffman tree */ + ct_init(&attr, &method); + rsync_chunk_end = 0xFFFFFFFFUL; + + flush = 1; + } + if (flush) FLUSH_BLOCK(0), block_start = strstart; + RSYNC_ROLL(strstart, 1); strstart++; lookahead--; } else { /* There is no previous match to compare with, wait for * the next step to decide. */ + if (rsync && strstart > rsync_chunk_end) { + ush attr = 0; /* ascii/binary flag */ + + /* Reset huffman tree */ + ct_init(&attr, &method); + rsync_chunk_end = 0xFFFFFFFFUL; + + FLUSH_BLOCK(0), block_start = strstart; + } match_available = 1; + RSYNC_ROLL(strstart, 1); strstart++; lookahead--; } diff --git a/doc/gzip.texi b/doc/gzip.texi index a6009d2..84887c3 100644 --- a/doc/gzip.texi +++ b/doc/gzip.texi @@ -353,6 +353,14 @@ specified on the command line are directories, @command{gzip} will descend into the directory and compress all the files it finds there (or decompress them in the case of @command{gunzip}). +@item --rsyncable +While compressing, synchronize the output occasionally based on the +input. This can reduce the compression slightly in some cases, but +means that the @code{rsync} program can take advantage of similarities +in the uncompressed input when syncronizing two files compressed with +this flag. @code{gunzip} cannot tell the difference between a +compressed file created with this option, and one created without it. + @item --suffix @var{suf} @itemx -S @var{suf} Use suffix @var{suf} instead of @samp{.gz}. Any suffix can be diff --git a/gzip.c b/gzip.c index 9e2a890..b867350 100644 --- a/gzip.c +++ b/gzip.c @@ -218,6 +218,7 @@ int ofd; /* output file descriptor */ unsigned insize; /* valid bytes in inbuf */ unsigned inptr; /* index of next byte to be processed in inbuf */ unsigned outcnt; /* bytes in output buffer */ +int rsync = 0; /* make ryncable chunks */ static int handled_sig[] = { @@ -271,6 +272,7 @@ struct option longopts[] = {"best", 0, 0, '9'}, /* compress better */ {"lzw", 0, 0, 'Z'}, /* make output compatible with old compress */ {"bits", 1, 0, 'b'}, /* max number of bits per code (implies -Z) */ + {"rsyncable", 0, 0, 'R'}, /* make rsync-friendly archive */ { 0, 0, 0, 0 } }; @@ -352,6 +354,7 @@ local void help() " -Z, --lzw produce output compatible with old compress", " -b, --bits=BITS max number of bits per code (implies -Z)", #endif + " --rsyncable Make rsync-friendly archive", "", "With no FILE, or when FILE is -, read standard input.", "", @@ -479,6 +482,9 @@ int main (int argc, char **argv) recursive = 1; #endif break; + case 'R': + rsync = 1; break; + case 'S': #ifdef NO_MULTIPLE_DOTS if (*optarg == '.') optarg++; diff --git a/gzip.h b/gzip.h index 0c3dd68..5270c56 100644 --- a/gzip.h +++ b/gzip.h @@ -146,6 +146,7 @@ EXTERN(uch, window); /* Sliding window and suffix table (unlzw) */ extern unsigned insize; /* valid bytes in inbuf */ extern unsigned inptr; /* index of next byte to be processed in inbuf */ extern unsigned outcnt; /* bytes in output buffer */ +extern int rsync; /* deflate into rsyncable chunks */ extern off_t bytes_in; /* number of input bytes */ extern off_t bytes_out; /* number of output bytes */ -- 2.30.2