1 diff --git a/deflate.c b/deflate.c
2 index 0950391..1b3ac52 100644
7 /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
10 +# define RSYNC_WIN 8192
12 +/* Size of rsync window, must be < MAX_DIST */
14 +#define RSYNC_SUM_MATCH(sum) (((sum) & (RSYNC_WIN - 1)) == 0)
15 +/* Whether window sum matches magic value */
17 /* ===========================================================================
18 * Local data used by the "longest match" routines.
20 @@ -212,6 +220,8 @@ local int compr_level;
21 unsigned near good_match;
22 /* Use a faster search when the previous match is longer than this */
24 +local ulg rsync_sum; /* rolling sum of rsync window */
25 +local ulg rsync_chunk_end; /* next rsync sequence point */
27 /* Values for max_lazy_match, good_match and max_chain_length, depending on
28 * the desired pack level (0..9). The values given below have been tuned to
29 @@ -307,6 +317,10 @@ void lm_init (pack_level, flags)
31 /* prev will be initialized on the fly */
34 + rsync_chunk_end = 0xFFFFFFFFUL;
37 /* Set the default configuration parameters:
39 max_lazy_match = configuration_table[pack_level].max_lazy;
40 @@ -324,6 +338,7 @@ void lm_init (pack_level, flags)
44 + rsync_chunk_end = 0xFFFFFFFFUL;
46 match_init(); /* initialize the asm code */
48 @@ -543,6 +558,8 @@ local void fill_window()
49 memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);
51 strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
52 + if (rsync_chunk_end != 0xFFFFFFFFUL)
53 + rsync_chunk_end -= WSIZE;
55 block_start -= (long) WSIZE;
57 @@ -570,6 +587,39 @@ local void fill_window()
61 +local void rsync_roll(start, num)
67 + if (start < RSYNC_WIN) {
68 + /* before window fills. */
69 + for (i = start; i < RSYNC_WIN; i++) {
70 + if (i == start + num) return;
71 + rsync_sum += (ulg)window[i];
73 + num -= (RSYNC_WIN - start);
77 + /* buffer after window full */
78 + for (i = start; i < start+num; i++) {
79 + /* New character in */
80 + rsync_sum += (ulg)window[i];
81 + /* Old character out */
82 + rsync_sum -= (ulg)window[i - RSYNC_WIN];
83 + if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum))
84 + rsync_chunk_end = i;
88 +/* ===========================================================================
89 + * Set rsync_chunk_end if window sum matches magic value.
91 +#define RSYNC_ROLL(s, n) \
92 + do { if (rsync) rsync_roll((s), (n)); } while(0)
94 /* ===========================================================================
95 * Flush the current block, with given end-of-file flag.
96 * IN assertion: strstart is set to the end of the current match.
97 @@ -617,6 +667,7 @@ local off_t deflate_fast()
99 lookahead -= match_length;
101 + RSYNC_ROLL(strstart, match_length);
102 /* Insert new strings in the hash table only if the match length
103 * is not too large. This saves time but degrades compression.
105 @@ -645,9 +696,18 @@ local off_t deflate_fast()
106 /* No match, output a literal byte */
107 Tracevv((stderr,"%c",window[strstart]));
108 flush = ct_tally (0, window[strstart]);
109 + RSYNC_ROLL(strstart, 1);
113 + if (rsync && strstart > rsync_chunk_end) {
114 + ush attr = 0; /* ascii/binary flag */
117 + /* Reset huffman tree */
118 + ct_init(&attr, &method);
119 + rsync_chunk_end = 0xFFFFFFFFUL;
121 if (flush) FLUSH_BLOCK(0), block_start = strstart;
123 /* Make sure that we always have enough lookahead, except
124 @@ -721,6 +781,7 @@ off_t deflate()
126 lookahead -= prev_length-1;
128 + RSYNC_ROLL(strstart, prev_length+1);
131 INSERT_STRING(strstart, hash_head);
132 @@ -733,24 +794,51 @@ off_t deflate()
134 match_length = MIN_MATCH-1;
136 - if (flush) FLUSH_BLOCK(0), block_start = strstart;
138 + if (rsync && strstart > rsync_chunk_end) {
139 + ush attr = 0; /* ascii/binary flag */
141 + /* Reset huffman tree */
142 + ct_init(&attr, &method);
143 + rsync_chunk_end = 0xFFFFFFFFUL;
146 + if (flush) FLUSH_BLOCK(0), block_start = strstart;
147 } else if (match_available) {
148 /* If there was no match at the previous position, output a
149 * single literal. If there was a match but the current match
150 * is longer, truncate the previous match to a single literal.
152 Tracevv((stderr,"%c",window[strstart-1]));
153 - if (ct_tally (0, window[strstart-1])) {
154 - FLUSH_BLOCK(0), block_start = strstart;
156 + flush = ct_tally (0, window[strstart-1]);
157 + if (rsync && strstart > rsync_chunk_end) {
158 + ush attr = 0; /* ascii/binary flag */
160 + /* Reset huffman tree */
161 + ct_init(&attr, &method);
162 + rsync_chunk_end = 0xFFFFFFFFUL;
166 + if (flush) FLUSH_BLOCK(0), block_start = strstart;
167 + RSYNC_ROLL(strstart, 1);
171 /* There is no previous match to compare with, wait for
172 * the next step to decide.
174 + if (rsync && strstart > rsync_chunk_end) {
175 + ush attr = 0; /* ascii/binary flag */
177 + /* Reset huffman tree */
178 + ct_init(&attr, &method);
179 + rsync_chunk_end = 0xFFFFFFFFUL;
181 + FLUSH_BLOCK(0), block_start = strstart;
184 + RSYNC_ROLL(strstart, 1);
188 diff --git a/doc/gzip.texi b/doc/gzip.texi
189 index a6009d2..84887c3 100644
192 @@ -353,6 +353,14 @@ specified on the command line are directories, @command{gzip} will descend
193 into the directory and compress all the files it finds there (or
194 decompress them in the case of @command{gunzip}).
197 +While compressing, synchronize the output occasionally based on the
198 +input. This can reduce the compression slightly in some cases, but
199 +means that the @code{rsync} program can take advantage of similarities
200 +in the uncompressed input when syncronizing two files compressed with
201 +this flag. @code{gunzip} cannot tell the difference between a
202 +compressed file created with this option, and one created without it.
204 @item --suffix @var{suf}
206 Use suffix @var{suf} instead of @samp{.gz}. Any suffix can be
207 diff --git a/gzip.c b/gzip.c
208 index 9e2a890..b867350 100644
211 @@ -218,6 +218,7 @@ int ofd; /* output file descriptor */
212 unsigned insize; /* valid bytes in inbuf */
213 unsigned inptr; /* index of next byte to be processed in inbuf */
214 unsigned outcnt; /* bytes in output buffer */
215 +int rsync = 0; /* make ryncable chunks */
217 static int handled_sig[] =
219 @@ -271,6 +272,7 @@ struct option longopts[] =
220 {"best", 0, 0, '9'}, /* compress better */
221 {"lzw", 0, 0, 'Z'}, /* make output compatible with old compress */
222 {"bits", 1, 0, 'b'}, /* max number of bits per code (implies -Z) */
223 + {"rsyncable", 0, 0, 'R'}, /* make rsync-friendly archive */
227 @@ -352,6 +354,7 @@ local void help()
228 " -Z, --lzw produce output compatible with old compress",
229 " -b, --bits=BITS max number of bits per code (implies -Z)",
231 + " --rsyncable Make rsync-friendly archive",
233 "With no FILE, or when FILE is -, read standard input.",
235 @@ -479,6 +482,9 @@ int main (int argc, char **argv)
243 #ifdef NO_MULTIPLE_DOTS
244 if (*optarg == '.') optarg++;
245 diff --git a/gzip.h b/gzip.h
246 index 0c3dd68..5270c56 100644
249 @@ -146,6 +146,7 @@ EXTERN(uch, window); /* Sliding window and suffix table (unlzw) */
250 extern unsigned insize; /* valid bytes in inbuf */
251 extern unsigned inptr; /* index of next byte to be processed in inbuf */
252 extern unsigned outcnt; /* bytes in output buffer */
253 +extern int rsync; /* deflate into rsyncable chunks */
255 extern off_t bytes_in; /* number of input bytes */
256 extern off_t bytes_out; /* number of output bytes */
257 diff --git a/doc/gzip.texi b/doc/gzip.texi
258 index 84887c3..ecd4e00 100644
261 @@ -207,6 +207,7 @@ Mandatory arguments to long options are mandatory for short options too.
262 -V, --version display version number
263 -1, --fast compress faster
264 -9, --best compress better
265 + --rsyncable Make rsync-friendly archive
267 With no FILE, or when FILE is -, read standard input.
269 @@ -361,6 +362,14 @@ in the uncompressed input when syncronizing two files compressed with
270 this flag. @code{gunzip} cannot tell the difference between a
271 compressed file created with this option, and one created without it.
274 +While compressing, synchronize the output occasionally based on
275 +the input. This increases size by less than 1 percent most
276 +cases, but means that the @command{rsync} program can much more efficiently
277 +synchronize files compressed with this flag. @command{gunzip}
278 +cannot tell the difference between a compressed file created
279 +with this option, and one created without it.
281 @item --suffix @var{suf}
283 Use suffix @var{suf} instead of @samp{.gz}. Any suffix can be