From afe7cf78d51b819dcdc5b0f4cb85a25a52a9fcd0 Mon Sep 17 00:00:00 2001 From: Mark Adler Date: Wed, 7 Dec 2011 23:57:37 -0800 Subject: [PATCH] Enable dictionary setting in middle of stream, and keeping the dictionary. This patch adds the deflateResetKeep() function to retain the sliding window for the next deflate operation, and fixes an inflateResetKeep() problem that came from inflate() not updating the window when the stream completed. This enables constructing and decompressing a series of concatenated deflate streams where each can depend on the history of uncompressed data that precedes it. This generalizes deflateSetDictionary() and inflateSetDictionary() to permit setting the dictionary in the middle of a stream for raw deflate and inflate. This in combination with the Keep functions enables a scheme for updating files block by block with the transmission of compressed data, where blocks are sent with deflateResetKeep() to retain history for better compression, and deflateSetDictionary() is used for blocks already present at the receiver to skip compression but insert that data in the history, again for better compression. The corresponding inflate calls are done on the receiver side. --- as400/bndsrc | 1 + as400/zlib.inc | 4 ++ contrib/vstudio/vc10/zlibvc.def | 1 + contrib/vstudio/vc9/zlibvc.def | 1 + deflate.c | 90 +++++++++++++++++++++++---------- inflate.c | 27 +++++----- win32/zlib.def | 1 + zconf.h | 1 + zconf.h.cmakein | 1 + zconf.h.in | 1 + zlib.h | 27 ++++++---- zlib.map | 4 ++ 12 files changed, 109 insertions(+), 50 deletions(-) diff --git a/as400/bndsrc b/as400/bndsrc index 036cd63a..3e26283d 100644 --- a/as400/bndsrc +++ b/as400/bndsrc @@ -33,6 +33,7 @@ STRPGMEXP PGMLVL(*CURRENT) SIGNATURE('ZLIB') EXPORT SYMBOL("deflateSetDictionary") EXPORT SYMBOL("deflateCopy") EXPORT SYMBOL("deflateReset") + EXPORT SYMBOL("deflateResetKeep") EXPORT SYMBOL("deflateParams") EXPORT SYMBOL("deflatePending") EXPORT SYMBOL("deflatePrime") diff --git a/as400/zlib.inc b/as400/zlib.inc index 976dca2a..f0915c82 100644 --- a/as400/zlib.inc +++ b/as400/zlib.inc @@ -433,6 +433,10 @@ * D inflateResetKeep... D PR 10I 0 extproc('inflateResetKeep') End and init. stream + D strm like(z_stream) Expansion stream + * + D deflateResetKeep... + D PR 10I 0 extproc('deflateResetKeep') End and init. stream D strm like(z_stream) Expansion stream * D gzflags PR 10U 0 extproc('gzflags') diff --git a/contrib/vstudio/vc10/zlibvc.def b/contrib/vstudio/vc10/zlibvc.def index d6ab1c1b..55218854 100644 --- a/contrib/vstudio/vc10/zlibvc.def +++ b/contrib/vstudio/vc10/zlibvc.def @@ -133,3 +133,4 @@ EXPORTS gzgetc_ @30 gzflags @162 inflateResetKeep @163 + deflateResetKeep @164 diff --git a/contrib/vstudio/vc9/zlibvc.def b/contrib/vstudio/vc9/zlibvc.def index d6ab1c1b..55218854 100644 --- a/contrib/vstudio/vc9/zlibvc.def +++ b/contrib/vstudio/vc9/zlibvc.def @@ -133,3 +133,4 @@ EXPORTS gzgetc_ @30 gzflags @162 inflateResetKeep @163 + deflateResetKeep @164 diff --git a/deflate.c b/deflate.c index 4b8e91bb..096207d6 100644 --- a/deflate.c +++ b/deflate.c @@ -323,43 +323,68 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) uInt dictLength; { deflate_state *s; - uInt length = dictLength; - uInt n; - IPos hash_head = 0; + uInt str, n; + int wrap; + unsigned avail; + unsigned char *next; - if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || - strm->state->wrap == 2 || - (strm->state->wrap == 1 && strm->state->status != INIT_STATE)) + if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL) return Z_STREAM_ERROR; - s = strm->state; - if (s->wrap) - strm->adler = adler32(strm->adler, dictionary, dictLength); + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; - if (length < MIN_MATCH) return Z_OK; - if (length > s->w_size) { - length = s->w_size; - dictionary += dictLength - length; /* use the tail of the dictionary */ + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; } - zmemcpy(s->window, dictionary, length); - s->strstart = length; - s->block_start = (long)length; - /* Insert all strings in the hash table (except for the last two bytes). - * s->lookahead stays null, so s->ins_h will be recomputed at the next - * call of fill_window. - */ - s->ins_h = s->window[0]; - UPDATE_HASH(s, s->ins_h, s->window[1]); - for (n = 0; n <= length - MIN_MATCH; n++) { - INSERT_STRING(s, n, hash_head); + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); } - if (hash_head) hash_head = 0; /* to make compiler happy */ + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; return Z_OK; } /* ========================================================================= */ -int ZEXPORT deflateReset (strm) +int ZEXPORT deflateResetKeep (strm) z_streamp strm; { deflate_state *s; @@ -389,11 +414,22 @@ int ZEXPORT deflateReset (strm) s->last_flush = Z_NO_FLUSH; _tr_init(s); - lm_init(s); return Z_OK; } +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + /* ========================================================================= */ int ZEXPORT deflateSetHeader (strm, head) z_streamp strm; diff --git a/inflate.c b/inflate.c index 6b0ebbfb..6832b8b7 100644 --- a/inflate.c +++ b/inflate.c @@ -1232,7 +1232,7 @@ int flush; */ inf_leave: RESTORE(); - if (state->wsize || (state->mode < CHECK && out != strm->avail_out)) + if (state->wsize || (state->mode < BAD && out != strm->avail_out)) if (updatewindow(strm, out)) { state->mode = MEM; return Z_MEM_ERROR; @@ -1274,6 +1274,9 @@ uInt dictLength; { struct inflate_state FAR *state; unsigned long id; + unsigned char *next; + unsigned avail; + int ret; /* check state */ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; @@ -1289,21 +1292,19 @@ uInt dictLength; return Z_DATA_ERROR; } - /* copy dictionary to window */ - if (updatewindow(strm, strm->avail_out)) { + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + next = strm->next_out; + avail = strm->avail_out; + strm->next_out = (Bytef *)dictionary + dictLength; + strm->avail_out = 0; + ret = updatewindow(strm, dictLength); + strm->avail_out = avail; + strm->next_out = next; + if (ret) { state->mode = MEM; return Z_MEM_ERROR; } - if (dictLength > state->wsize) { - zmemcpy(state->window, dictionary + dictLength - state->wsize, - state->wsize); - state->whave = state->wsize; - } - else { - zmemcpy(state->window + state->wsize - dictLength, dictionary, - dictLength); - state->whave = dictLength; - } state->havedict = 1; Tracev((stderr, "inflate: dictionary set\n")); return Z_OK; diff --git a/win32/zlib.def b/win32/zlib.def index c420d8b9..21bff1f8 100644 --- a/win32/zlib.def +++ b/win32/zlib.def @@ -78,5 +78,6 @@ EXPORTS get_crc_table inflateUndermine inflateResetKeep + deflateResetKeep gzgetc_ gzflags diff --git a/zconf.h b/zconf.h index 84ffb32b..51c80ac1 100644 --- a/zconf.h +++ b/zconf.h @@ -46,6 +46,7 @@ # define deflatePending z_deflatePending # define deflatePrime z_deflatePrime # define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep # define deflateSetDictionary z_deflateSetDictionary # define deflateSetHeader z_deflateSetHeader # define deflateTune z_deflateTune diff --git a/zconf.h.cmakein b/zconf.h.cmakein index b2d78b3a..3ea5531d 100644 --- a/zconf.h.cmakein +++ b/zconf.h.cmakein @@ -48,6 +48,7 @@ # define deflatePending z_deflatePending # define deflatePrime z_deflatePrime # define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep # define deflateSetDictionary z_deflateSetDictionary # define deflateSetHeader z_deflateSetHeader # define deflateTune z_deflateTune diff --git a/zconf.h.in b/zconf.h.in index 84ffb32b..51c80ac1 100644 --- a/zconf.h.in +++ b/zconf.h.in @@ -46,6 +46,7 @@ # define deflatePending z_deflatePending # define deflatePrime z_deflatePrime # define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep # define deflateSetDictionary z_deflateSetDictionary # define deflateSetHeader z_deflateSetHeader # define deflateTune z_deflateTune diff --git a/zlib.h b/zlib.h index 3121b0a7..3669f2e2 100644 --- a/zlib.h +++ b/zlib.h @@ -581,10 +581,15 @@ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, uInt dictLength)); /* Initializes the compression dictionary from the given byte sequence - without producing any compressed output. This function must be called - immediately after deflateInit, deflateInit2 or deflateReset, before any call - of deflate. The compressor and decompressor must use exactly the same - dictionary (see inflateSetDictionary). + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). The dictionary should consist of strings (byte sequences) that are likely to be encountered later in the data to be compressed, with the most commonly @@ -611,8 +616,8 @@ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is inconsistent (for example if deflate has already been called for this stream - or if the compression method is bsort). deflateSetDictionary does not - perform any compression: this will be done by deflate(). + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). */ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, @@ -810,10 +815,11 @@ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, if that call returned Z_NEED_DICT. The dictionary chosen by the compressor can be determined from the adler32 value returned by that call of inflate. The compressor and decompressor must use exactly the same dictionary (see - deflateSetDictionary). For raw inflate, this function can be called - immediately after inflateInit2() or inflateReset() and before any call of - inflate() to set the dictionary. The application must insure that the - dictionary that was used for compression is provided. + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is @@ -1694,6 +1700,7 @@ ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); #ifndef Z_SOLO ZEXTERN unsigned long ZEXPORT gzflags OF((void)); #endif diff --git a/zlib.map b/zlib.map index dd27591b..80c4774f 100644 --- a/zlib.map +++ b/zlib.map @@ -76,3 +76,7 @@ ZLIB_1.2.5.2 { gzgetc_; inflateResetKeep; } ZLIB_1.2.5.1; + +ZLIB_1.2.5.3 { + deflateResetKeep; +} ZLIB_1.2.5.2;