--- /dev/null
+diff --git a/deflate.c b/deflate.c
+index 0950391..1b3ac52 100644
+--- a/deflate.c
++++ b/deflate.c
+@@ -131,6 +131,14 @@
+ #endif
+ /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
++#ifndef RSYNC_WIN
++# define RSYNC_WIN 8192
++#endif
++/* Size of rsync window, must be < MAX_DIST */
++
++#define RSYNC_SUM_MATCH(sum) (((sum) & (RSYNC_WIN - 1)) == 0)
++/* Whether window sum matches magic value */
++
+ /* ===========================================================================
+ * Local data used by the "longest match" routines.
+ */
+@@ -212,6 +220,8 @@ local int compr_level;
+ unsigned near good_match;
+ /* Use a faster search when the previous match is longer than this */
+
++local ulg rsync_sum; /* rolling sum of rsync window */
++local ulg rsync_chunk_end; /* next rsync sequence point */
+
+ /* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+@@ -307,6 +317,10 @@ void lm_init (pack_level, flags)
+ #endif
+ /* prev will be initialized on the fly */
+
++ /* rsync params */
++ rsync_chunk_end = 0xFFFFFFFFUL;
++ rsync_sum = 0;
++
+ /* Set the default configuration parameters:
+ */
+ max_lazy_match = configuration_table[pack_level].max_lazy;
+@@ -324,6 +338,7 @@ void lm_init (pack_level, flags)
+
+ strstart = 0;
+ block_start = 0L;
++ rsync_chunk_end = 0xFFFFFFFFUL;
+ #ifdef ASMV
+ match_init(); /* initialize the asm code */
+ #endif
+@@ -543,6 +558,8 @@ local void fill_window()
+ memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);
+ match_start -= WSIZE;
+ strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
++ if (rsync_chunk_end != 0xFFFFFFFFUL)
++ rsync_chunk_end -= WSIZE;
+
+ block_start -= (long) WSIZE;
+
+@@ -570,6 +587,39 @@ local void fill_window()
+ }
+ }
+
++local void rsync_roll(start, num)
++ unsigned start;
++ unsigned num;
++{
++ unsigned i;
++
++ if (start < RSYNC_WIN) {
++ /* before window fills. */
++ for (i = start; i < RSYNC_WIN; i++) {
++ if (i == start + num) return;
++ rsync_sum += (ulg)window[i];
++ }
++ num -= (RSYNC_WIN - start);
++ start = RSYNC_WIN;
++ }
++
++ /* buffer after window full */
++ for (i = start; i < start+num; i++) {
++ /* New character in */
++ rsync_sum += (ulg)window[i];
++ /* Old character out */
++ rsync_sum -= (ulg)window[i - RSYNC_WIN];
++ if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum))
++ rsync_chunk_end = i;
++ }
++}
++
++/* ===========================================================================
++ * Set rsync_chunk_end if window sum matches magic value.
++ */
++#define RSYNC_ROLL(s, n) \
++ do { if (rsync) rsync_roll((s), (n)); } while(0)
++
+ /* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+@@ -617,6 +667,7 @@ local off_t deflate_fast()
+
+ lookahead -= match_length;
+
++ RSYNC_ROLL(strstart, match_length);
+ /* Insert new strings in the hash table only if the match length
+ * is not too large. This saves time but degrades compression.
+ */
+@@ -645,9 +696,18 @@ local off_t deflate_fast()
+ /* No match, output a literal byte */
+ Tracevv((stderr,"%c",window[strstart]));
+ flush = ct_tally (0, window[strstart]);
++ RSYNC_ROLL(strstart, 1);
+ lookahead--;
+ strstart++;
+ }
++ if (rsync && strstart > rsync_chunk_end) {
++ ush attr = 0; /* ascii/binary flag */
++
++ flush = 1;
++ /* Reset huffman tree */
++ ct_init(&attr, &method);
++ rsync_chunk_end = 0xFFFFFFFFUL;
++ }
+ if (flush) FLUSH_BLOCK(0), block_start = strstart;
+
+ /* Make sure that we always have enough lookahead, except
+@@ -721,6 +781,7 @@ off_t deflate()
+ */
+ lookahead -= prev_length-1;
+ prev_length -= 2;
++ RSYNC_ROLL(strstart, prev_length+1);
+ do {
+ strstart++;
+ INSERT_STRING(strstart, hash_head);
+@@ -733,24 +794,51 @@ off_t deflate()
+ match_available = 0;
+ match_length = MIN_MATCH-1;
+ strstart++;
+- if (flush) FLUSH_BLOCK(0), block_start = strstart;
+
++ if (rsync && strstart > rsync_chunk_end) {
++ ush attr = 0; /* ascii/binary flag */
++
++ /* Reset huffman tree */
++ ct_init(&attr, &method);
++ rsync_chunk_end = 0xFFFFFFFFUL;
++ flush = 1;
++ }
++ if (flush) FLUSH_BLOCK(0), block_start = strstart;
+ } else if (match_available) {
+ /* If there was no match at the previous position, output a
+ * single literal. If there was a match but the current match
+ * is longer, truncate the previous match to a single literal.
+ */
+ Tracevv((stderr,"%c",window[strstart-1]));
+- if (ct_tally (0, window[strstart-1])) {
+- FLUSH_BLOCK(0), block_start = strstart;
+- }
++ flush = ct_tally (0, window[strstart-1]);
++ if (rsync && strstart > rsync_chunk_end) {
++ ush attr = 0; /* ascii/binary flag */
++
++ /* Reset huffman tree */
++ ct_init(&attr, &method);
++ rsync_chunk_end = 0xFFFFFFFFUL;
++
++ flush = 1;
++ }
++ if (flush) FLUSH_BLOCK(0), block_start = strstart;
++ RSYNC_ROLL(strstart, 1);
+ strstart++;
+ lookahead--;
+ } else {
+ /* There is no previous match to compare with, wait for
+ * the next step to decide.
+ */
++ if (rsync && strstart > rsync_chunk_end) {
++ ush attr = 0; /* ascii/binary flag */
++
++ /* Reset huffman tree */
++ ct_init(&attr, &method);
++ rsync_chunk_end = 0xFFFFFFFFUL;
++
++ FLUSH_BLOCK(0), block_start = strstart;
++ }
+ match_available = 1;
++ RSYNC_ROLL(strstart, 1);
+ strstart++;
+ lookahead--;
+ }
+diff --git a/doc/gzip.texi b/doc/gzip.texi
+index a6009d2..84887c3 100644
+--- a/doc/gzip.texi
++++ b/doc/gzip.texi
+@@ -353,6 +353,14 @@ specified on the command line are directories, @command{gzip} will descend
+ into the directory and compress all the files it finds there (or
+ decompress them in the case of @command{gunzip}).
+
++@item --rsyncable
++While compressing, synchronize the output occasionally based on the
++input. This can reduce the compression slightly in some cases, but
++means that the @code{rsync} program can take advantage of similarities
++in the uncompressed input when syncronizing two files compressed with
++this flag. @code{gunzip} cannot tell the difference between a
++compressed file created with this option, and one created without it.
++
+ @item --suffix @var{suf}
+ @itemx -S @var{suf}
+ Use suffix @var{suf} instead of @samp{.gz}. Any suffix can be
+diff --git a/gzip.c b/gzip.c
+index 9e2a890..b867350 100644
+--- a/gzip.c
++++ b/gzip.c
+@@ -218,6 +218,7 @@ int ofd; /* output file descriptor */
+ unsigned insize; /* valid bytes in inbuf */
+ unsigned inptr; /* index of next byte to be processed in inbuf */
+ unsigned outcnt; /* bytes in output buffer */
++int rsync = 0; /* make ryncable chunks */
+
+ static int handled_sig[] =
+ {
+@@ -271,6 +272,7 @@ struct option longopts[] =
+ {"best", 0, 0, '9'}, /* compress better */
+ {"lzw", 0, 0, 'Z'}, /* make output compatible with old compress */
+ {"bits", 1, 0, 'b'}, /* max number of bits per code (implies -Z) */
++ {"rsyncable", 0, 0, 'R'}, /* make rsync-friendly archive */
+ { 0, 0, 0, 0 }
+ };
+
+@@ -352,6 +354,7 @@ local void help()
+ " -Z, --lzw produce output compatible with old compress",
+ " -b, --bits=BITS max number of bits per code (implies -Z)",
+ #endif
++ " --rsyncable Make rsync-friendly archive",
+ "",
+ "With no FILE, or when FILE is -, read standard input.",
+ "",
+@@ -479,6 +482,9 @@ int main (int argc, char **argv)
+ recursive = 1;
+ #endif
+ break;
++ case 'R':
++ rsync = 1; break;
++
+ case 'S':
+ #ifdef NO_MULTIPLE_DOTS
+ if (*optarg == '.') optarg++;
+diff --git a/gzip.h b/gzip.h
+index 0c3dd68..5270c56 100644
+--- a/gzip.h
++++ b/gzip.h
+@@ -146,6 +146,7 @@ EXTERN(uch, window); /* Sliding window and suffix table (unlzw) */
+ extern unsigned insize; /* valid bytes in inbuf */
+ extern unsigned inptr; /* index of next byte to be processed in inbuf */
+ extern unsigned outcnt; /* bytes in output buffer */
++extern int rsync; /* deflate into rsyncable chunks */
+
+ extern off_t bytes_in; /* number of input bytes */
+ extern off_t bytes_out; /* number of output bytes */
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+#ifndef RSYNC_WIN
+# define RSYNC_WIN 8192
+#endif
+/* Size of rsync window, must be < MAX_DIST */
+
+#define RSYNC_SUM_MATCH(sum) (((sum) & (RSYNC_WIN - 1)) == 0)
+/* Whether window sum matches magic value */
+
/* ===========================================================================
* Local data used by the "longest match" routines.
*/
unsigned near good_match;
/* Use a faster search when the previous match is longer than this */
+local ulg rsync_sum; /* rolling sum of rsync window */
+local ulg rsync_chunk_end; /* next rsync sequence point */
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
#endif
/* prev will be initialized on the fly */
+ /* rsync params */
+ rsync_chunk_end = 0xFFFFFFFFUL;
+ rsync_sum = 0;
+
/* Set the default configuration parameters:
*/
max_lazy_match = configuration_table[pack_level].max_lazy;
strstart = 0;
block_start = 0L;
+ rsync_chunk_end = 0xFFFFFFFFUL;
#ifdef ASMV
match_init(); /* initialize the asm code */
#endif
memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);
match_start -= WSIZE;
strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
+ if (rsync_chunk_end != 0xFFFFFFFFUL)
+ rsync_chunk_end -= WSIZE;
block_start -= (long) WSIZE;
}
}
+local void rsync_roll(start, num)
+ unsigned start;
+ unsigned num;
+{
+ unsigned i;
+
+ if (start < RSYNC_WIN) {
+ /* before window fills. */
+ for (i = start; i < RSYNC_WIN; i++) {
+ if (i == start + num) return;
+ rsync_sum += (ulg)window[i];
+ }
+ num -= (RSYNC_WIN - start);
+ start = RSYNC_WIN;
+ }
+
+ /* buffer after window full */
+ for (i = start; i < start+num; i++) {
+ /* New character in */
+ rsync_sum += (ulg)window[i];
+ /* Old character out */
+ rsync_sum -= (ulg)window[i - RSYNC_WIN];
+ if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum))
+ rsync_chunk_end = i;
+ }
+}
+
+/* ===========================================================================
+ * Set rsync_chunk_end if window sum matches magic value.
+ */
+#define RSYNC_ROLL(s, n) \
+ do { if (rsync) rsync_roll((s), (n)); } while(0)
+
/* ===========================================================================
* Flush the current block, with given end-of-file flag.
* IN assertion: strstart is set to the end of the current match.
lookahead -= match_length;
+ RSYNC_ROLL(strstart, match_length);
/* Insert new strings in the hash table only if the match length
* is not too large. This saves time but degrades compression.
*/
/* No match, output a literal byte */
Tracevv((stderr,"%c",window[strstart]));
flush = ct_tally (0, window[strstart]);
+ RSYNC_ROLL(strstart, 1);
lookahead--;
strstart++;
}
+ if (rsync && strstart > rsync_chunk_end) {
+ ush attr = 0; /* ascii/binary flag */
+
+ flush = 1;
+ /* Reset huffman tree */
+ ct_init(&attr, &method);
+ rsync_chunk_end = 0xFFFFFFFFUL;
+ }
if (flush) FLUSH_BLOCK(0), block_start = strstart;
/* Make sure that we always have enough lookahead, except
*/
lookahead -= prev_length-1;
prev_length -= 2;
+ RSYNC_ROLL(strstart, prev_length+1);
do {
strstart++;
INSERT_STRING(strstart, hash_head);
match_available = 0;
match_length = MIN_MATCH-1;
strstart++;
- if (flush) FLUSH_BLOCK(0), block_start = strstart;
+ if (rsync && strstart > rsync_chunk_end) {
+ ush attr = 0; /* ascii/binary flag */
+
+ /* Reset huffman tree */
+ ct_init(&attr, &method);
+ rsync_chunk_end = 0xFFFFFFFFUL;
+ flush = 1;
+ }
+ if (flush) FLUSH_BLOCK(0), block_start = strstart;
} else if (match_available) {
/* If there was no match at the previous position, output a
* single literal. If there was a match but the current match
* is longer, truncate the previous match to a single literal.
*/
Tracevv((stderr,"%c",window[strstart-1]));
- if (ct_tally (0, window[strstart-1])) {
- FLUSH_BLOCK(0), block_start = strstart;
- }
+ flush = ct_tally (0, window[strstart-1]);
+ if (rsync && strstart > rsync_chunk_end) {
+ ush attr = 0; /* ascii/binary flag */
+
+ /* Reset huffman tree */
+ ct_init(&attr, &method);
+ rsync_chunk_end = 0xFFFFFFFFUL;
+
+ flush = 1;
+ }
+ if (flush) FLUSH_BLOCK(0), block_start = strstart;
+ RSYNC_ROLL(strstart, 1);
strstart++;
lookahead--;
} else {
/* There is no previous match to compare with, wait for
* the next step to decide.
*/
+ if (rsync && strstart > rsync_chunk_end) {
+ ush attr = 0; /* ascii/binary flag */
+
+ /* Reset huffman tree */
+ ct_init(&attr, &method);
+ rsync_chunk_end = 0xFFFFFFFFUL;
+
+ FLUSH_BLOCK(0), block_start = strstart;
+ }
match_available = 1;
+ RSYNC_ROLL(strstart, 1);
strstart++;
lookahead--;
}