parent
9811b53dd9
commit
0484693e17
39 changed files with 2178 additions and 177 deletions
@ -0,0 +1,29 @@ |
||||
This directory contains examples of the use of zlib. |
||||
|
||||
fitblk.c |
||||
compress just enough input to nearly fill a requested output size |
||||
- zlib isn't designed to do this, but fitblk does it anyway |
||||
|
||||
gzappend.c |
||||
append to a gzip file |
||||
- illustrates the use of the Z_BLOCK flush parameter for inflate() |
||||
- illustrates the use of deflatePrime() to start at any bit |
||||
|
||||
gzjoin.c |
||||
join gzip files without recalculating the crc or recompressing |
||||
- illustrates the use of the Z_BLOCK flush parameter for inflate() |
||||
- illustrates the use of crc32_combine() |
||||
|
||||
gzlog.c |
||||
gzlog.h |
||||
efficiently maintain a message log file in gzip format |
||||
- illustrates use of raw deflate and Z_SYNC_FLUSH |
||||
- illustrates use of gzip header extra field |
||||
|
||||
zlib_how.html |
||||
painfully comprehensive description of zpipe.c (see below) |
||||
- describes in excruciating detail the use of deflate() and inflate() |
||||
|
||||
zpipe.c |
||||
reads and writes zlib streams from stdin to stdout |
||||
- illustrates the proper use of deflate() and inflate() |
@ -0,0 +1,235 @@ |
||||
/* fitblk.c: example of fitting compressed output to a specified size
|
||||
Not copyrighted -- provided to the public domain |
||||
Version 1.1 25 November 2004 Mark Adler */ |
||||
|
||||
/* Version history:
|
||||
1.0 24 Nov 2004 First version |
||||
1.1 25 Nov 2004 Change deflateInit2() to deflateInit() |
||||
Use fixed-size, stack-allocated raw buffers |
||||
Simplify code moving compression to subroutines |
||||
Use assert() for internal errors |
||||
Add detailed description of approach |
||||
*/ |
||||
|
||||
/* Approach to just fitting a requested compressed size:
|
||||
|
||||
fitblk performs three compression passes on a portion of the input |
||||
data in order to determine how much of that input will compress to |
||||
nearly the requested output block size. The first pass generates |
||||
enough deflate blocks to produce output to fill the requested |
||||
output size plus a specfied excess amount (see the EXCESS define |
||||
below). The last deflate block may go quite a bit past that, but |
||||
is discarded. The second pass decompresses and recompresses just |
||||
the compressed data that fit in the requested plus excess sized |
||||
buffer. The deflate process is terminated after that amount of |
||||
input, which is less than the amount consumed on the first pass. |
||||
The last deflate block of the result will be of a comparable size |
||||
to the final product, so that the header for that deflate block and |
||||
the compression ratio for that block will be about the same as in |
||||
the final product. The third compression pass decompresses the |
||||
result of the second step, but only the compressed data up to the |
||||
requested size minus an amount to allow the compressed stream to |
||||
complete (see the MARGIN define below). That will result in a |
||||
final compressed stream whose length is less than or equal to the |
||||
requested size. Assuming sufficient input and a requested size |
||||
greater than a few hundred bytes, the shortfall will typically be |
||||
less than ten bytes. |
||||
|
||||
If the input is short enough that the first compression completes |
||||
before filling the requested output size, then that compressed |
||||
stream is return with no recompression. |
||||
|
||||
EXCESS is chosen to be just greater than the shortfall seen in a |
||||
two pass approach similar to the above. That shortfall is due to |
||||
the last deflate block compressing more efficiently with a smaller |
||||
header on the second pass. EXCESS is set to be large enough so |
||||
that there is enough uncompressed data for the second pass to fill |
||||
out the requested size, and small enough so that the final deflate |
||||
block of the second pass will be close in size to the final deflate |
||||
block of the third and final pass. MARGIN is chosen to be just |
||||
large enough to assure that the final compression has enough room |
||||
to complete in all cases. |
||||
*/ |
||||
|
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <assert.h> |
||||
#include "zlib.h" |
||||
|
||||
#define local static |
||||
|
||||
/* print nastygram and leave */ |
||||
local void quit(char *why) |
||||
{ |
||||
fprintf(stderr, "fitblk abort: %s\n", why); |
||||
exit(1); |
||||
} |
||||
|
||||
#define RAWLEN 4096 /* intermediate uncompressed buffer size */ |
||||
|
||||
/* compress from file to def until provided buffer is full or end of
|
||||
input reached; return last deflate() return value, or Z_ERRNO if |
||||
there was read error on the file */ |
||||
local int partcompress(FILE *in, z_streamp def) |
||||
{ |
||||
int ret, flush; |
||||
char raw[RAWLEN]; |
||||
|
||||
flush = Z_NO_FLUSH; |
||||
do { |
||||
def->avail_in = fread(raw, 1, RAWLEN, in); |
||||
if (ferror(in)) |
||||
return Z_ERRNO; |
||||
def->next_in = raw; |
||||
if (feof(in)) |
||||
flush = Z_FINISH; |
||||
ret = deflate(def, flush); |
||||
assert(ret != Z_STREAM_ERROR); |
||||
} while (def->avail_out != 0 && flush == Z_NO_FLUSH); |
||||
return ret; |
||||
} |
||||
|
||||
/* recompress from inf's input to def's output; the input for inf and
|
||||
the output for def are set in those structures before calling; |
||||
return last deflate() return value, or Z_MEM_ERROR if inflate() |
||||
was not able to allocate enough memory when it needed to */ |
||||
local int recompress(z_streamp inf, z_streamp def) |
||||
{ |
||||
int ret, flush; |
||||
char raw[RAWLEN]; |
||||
|
||||
flush = Z_NO_FLUSH; |
||||
do { |
||||
/* decompress */ |
||||
inf->avail_out = RAWLEN; |
||||
inf->next_out = raw; |
||||
ret = inflate(inf, Z_NO_FLUSH); |
||||
assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && |
||||
ret != Z_NEED_DICT); |
||||
if (ret == Z_MEM_ERROR) |
||||
return ret; |
||||
|
||||
/* compress what was decompresed until done or no room */ |
||||
def->avail_in = RAWLEN - inf->avail_out; |
||||
def->next_in = raw; |
||||
if (inf->avail_out != 0) |
||||
flush = Z_FINISH; |
||||
ret = deflate(def, flush); |
||||
assert(ret != Z_STREAM_ERROR); |
||||
} while (ret != Z_STREAM_END && def->avail_out != 0); |
||||
return ret; |
||||
} |
||||
|
||||
#define EXCESS 256 /* empirically determined stream overage */ |
||||
#define MARGIN 8 /* amount to back off for completion */ |
||||
|
||||
/* compress from stdin to fixed-size block on stdout */ |
||||
int main(int argc, char **argv) |
||||
{ |
||||
int ret; /* return code */ |
||||
unsigned size; /* requested fixed output block size */ |
||||
unsigned have; /* bytes written by deflate() call */ |
||||
char *blk; /* intermediate and final stream */ |
||||
char *tmp; /* close to desired size stream */ |
||||
z_stream def, inf; /* zlib deflate and inflate states */ |
||||
|
||||
/* get requested output size */ |
||||
if (argc != 2) |
||||
quit("need one argument: size of output block"); |
||||
ret = strtol(argv[1], argv + 1, 10); |
||||
if (argv[1][0] != 0) |
||||
quit("argument must be a number"); |
||||
if (ret < 8) /* 8 is minimum zlib stream size */ |
||||
quit("need positive size of 8 or greater"); |
||||
size = (unsigned)ret; |
||||
|
||||
/* allocate memory for buffers and compression engine */ |
||||
blk = malloc(size + EXCESS); |
||||
def.zalloc = Z_NULL; |
||||
def.zfree = Z_NULL; |
||||
def.opaque = Z_NULL; |
||||
ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); |
||||
if (ret != Z_OK || blk == NULL) |
||||
quit("out of memory"); |
||||
|
||||
/* compress from stdin until output full, or no more input */ |
||||
def.avail_out = size + EXCESS; |
||||
def.next_out = blk; |
||||
ret = partcompress(stdin, &def); |
||||
if (ret == Z_ERRNO) |
||||
quit("error reading input"); |
||||
|
||||
/* if it all fit, then size was undersubscribed -- done! */ |
||||
if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { |
||||
/* write block to stdout */ |
||||
have = size + EXCESS - def.avail_out; |
||||
ret = fwrite(blk, 1, have, stdout); |
||||
if (ret != have || ferror(stdout)) |
||||
quit("error writing output"); |
||||
|
||||
/* clean up and print results to stderr */ |
||||
ret = deflateEnd(&def); |
||||
assert(ret != Z_STREAM_ERROR); |
||||
free(blk); |
||||
fprintf(stderr, |
||||
"%u bytes unused out of %u requested (all input)\n", |
||||
size - have, size); |
||||
return 0; |
||||
} |
||||
|
||||
/* it didn't all fit -- set up for recompression */ |
||||
inf.zalloc = Z_NULL; |
||||
inf.zfree = Z_NULL; |
||||
inf.opaque = Z_NULL; |
||||
inf.avail_in = 0; |
||||
inf.next_in = Z_NULL; |
||||
ret = inflateInit(&inf); |
||||
tmp = malloc(size + EXCESS); |
||||
if (ret != Z_OK || tmp == NULL) |
||||
quit("out of memory"); |
||||
ret = deflateReset(&def); |
||||
assert(ret != Z_STREAM_ERROR); |
||||
|
||||
/* do first recompression close to the right amount */ |
||||
inf.avail_in = size + EXCESS; |
||||
inf.next_in = blk; |
||||
def.avail_out = size + EXCESS; |
||||
def.next_out = tmp; |
||||
ret = recompress(&inf, &def); |
||||
if (ret == Z_MEM_ERROR) |
||||
quit("out of memory"); |
||||
|
||||
/* set up for next reocmpression */ |
||||
ret = inflateReset(&inf); |
||||
assert(ret != Z_STREAM_ERROR); |
||||
ret = deflateReset(&def); |
||||
assert(ret != Z_STREAM_ERROR); |
||||
|
||||
/* do second and final recompression (third compression) */ |
||||
inf.avail_in = size - MARGIN; /* assure stream will complete */ |
||||
inf.next_in = tmp; |
||||
def.avail_out = size; |
||||
def.next_out = blk; |
||||
ret = recompress(&inf, &def); |
||||
if (ret == Z_MEM_ERROR) |
||||
quit("out of memory"); |
||||
assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ |
||||
|
||||
/* done -- write block to stdout */ |
||||
have = size - def.avail_out; |
||||
ret = fwrite(blk, 1, have, stdout); |
||||
if (ret != have || ferror(stdout)) |
||||
quit("error writing output"); |
||||
|
||||
/* clean up and print results to stderr */ |
||||
free(tmp); |
||||
ret = inflateEnd(&inf); |
||||
assert(ret != Z_STREAM_ERROR); |
||||
ret = deflateEnd(&def); |
||||
assert(ret != Z_STREAM_ERROR); |
||||
free(blk); |
||||
fprintf(stderr, |
||||
"%u bytes unused out of %u requested (%lu input)\n", |
||||
size - have, size, def.total_in); |
||||
return 0; |
||||
} |
@ -0,0 +1,447 @@ |
||||
/* gzjoin -- command to join gzip files into one gzip file
|
||||
|
||||
Copyright (C) 2004 Mark Adler, all rights reserved |
||||
version 1.0, 11 Dec 2004 |
||||
|
||||
This software is provided 'as-is', without any express or implied |
||||
warranty. In no event will the author be held liable for any damages |
||||
arising from the use of this software. |
||||
|
||||
Permission is granted to anyone to use this software for any purpose, |
||||
including commercial applications, and to alter it and redistribute it |
||||
freely, subject to the following restrictions: |
||||
|
||||
1. The origin of this software must not be misrepresented; you must not |
||||
claim that you wrote the original software. If you use this software |
||||
in a product, an acknowledgment in the product documentation would be |
||||
appreciated but is not required. |
||||
2. Altered source versions must be plainly marked as such, and must not be |
||||
misrepresented as being the original software. |
||||
3. This notice may not be removed or altered from any source distribution. |
||||
|
||||
Mark Adler madler@alumni.caltech.edu |
||||
*/ |
||||
|
||||
/*
|
||||
* Change history: |
||||
* |
||||
* 1.0 11 Dec 2004 - First version |
||||
*/ |
||||
|
||||
/*
|
||||
gzjoin takes one or more gzip files on the command line and writes out a |
||||
single gzip file that will uncompress to the concatenation of the |
||||
uncompressed data from the individual gzip files. gzjoin does this without |
||||
having to recompress any of the data and without having to calculate a new |
||||
crc32 for the concatenated uncompressed data. gzjoin does however have to |
||||
decompress all of the input data in order to find the bits in the compressed |
||||
data that need to be modified to concatenate the streams. |
||||
|
||||
gzjoin does not do an integrity check on the input gzip files other than |
||||
checking the gzip header and decompressing the compressed data. They are |
||||
otherwise assumed to be complete and correct. |
||||
|
||||
Each joint between gzip files removes at least 18 bytes of previous trailer |
||||
and subsequent header, and inserts an average of about three bytes to the |
||||
compressed data in order to connect the streams. The output gzip file |
||||
has a minimal ten-byte gzip header with no file name or modification time. |
||||
|
||||
This program was written to illustrate the use of the Z_BLOCK option of |
||||
inflate() and the crc32_combine() function. gzjoin will not compile with |
||||
versions of zlib earlier than 1.2.3. |
||||
*/ |
||||
|
||||
#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */ |
||||
#include <stdlib.h> /* exit(), malloc(), free() */ |
||||
#include <fcntl.h> /* open() */ |
||||
#include <unistd.h> /* close(), read(), lseek() */ |
||||
#include "zlib.h" |
||||
/* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ |
||||
|
||||
#define local static |
||||
|
||||
/* exit with an error (return a value to allow use in an expression) */ |
||||
local int bail(char *why1, char *why2) |
||||
{ |
||||
fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); |
||||
exit(1); |
||||
return 0; |
||||
} |
||||
|
||||
/* -- simple buffered file input with access to the buffer -- */ |
||||
|
||||
#define CHUNK 32768 /* must be a power of two and fit in unsigned */ |
||||
|
||||
/* bin buffered input file type */ |
||||
typedef struct { |
||||
char *name; /* name of file for error messages */ |
||||
int fd; /* file descriptor */ |
||||
unsigned left; /* bytes remaining at next */ |
||||
unsigned char *next; /* next byte to read */ |
||||
unsigned char *buf; /* allocated buffer of length CHUNK */ |
||||
} bin; |
||||
|
||||
/* close a buffered file and free allocated memory */ |
||||
local void bclose(bin *in) |
||||
{ |
||||
if (in != NULL) { |
||||
if (in->fd != -1) |
||||
close(in->fd); |
||||
if (in->buf != NULL) |
||||
free(in->buf); |
||||
free(in); |
||||
} |
||||
} |
||||
|
||||
/* open a buffered file for input, return a pointer to type bin, or NULL on
|
||||
failure */ |
||||
local bin *bopen(char *name) |
||||
{ |
||||
bin *in; |
||||
|
||||
in = malloc(sizeof(bin)); |
||||
if (in == NULL) |
||||
return NULL; |
||||
in->buf = malloc(CHUNK); |
||||
in->fd = open(name, O_RDONLY, 0); |
||||
if (in->buf == NULL || in->fd == -1) { |
||||
bclose(in); |
||||
return NULL; |
||||
} |
||||
in->left = 0; |
||||
in->next = in->buf; |
||||
in->name = name; |
||||
return in; |
||||
} |
||||
|
||||
/* load buffer from file, return -1 on read error, 0 or 1 on success, with
|
||||
1 indicating that end-of-file was reached */ |
||||
local int bload(bin *in) |
||||
{ |
||||
ssize_t len; |
||||
|
||||
if (in == NULL) |
||||
return -1; |
||||
if (in->left != 0) |
||||
return 0; |
||||
in->next = in->buf; |
||||
do { |
||||
len = read(in->fd, in->buf + in->left, CHUNK - in->left); |
||||
if (len < 0) |
||||
return -1; |
||||
in->left += (unsigned)len; |
||||
} while (len != 0 && in->left < CHUNK); |
||||
return len == 0 ? 1 : 0; |
||||
} |
||||
|
||||
/* get a byte from the file, bail if end of file */ |
||||
#define bget(in) (in->left ? 0 : bload(in), \ |
||||
in->left ? (in->left--, *(in->next)++) : \
|
||||
bail("unexpected end of file on ", in->name)) |
||||
|
||||
/* get a four-byte little-endian unsigned integer from file */ |
||||
local unsigned long bget4(bin *in) |
||||
{ |
||||
unsigned long val; |
||||
|
||||
val = bget(in); |
||||
val += (unsigned long)(bget(in)) << 8; |
||||
val += (unsigned long)(bget(in)) << 16; |
||||
val += (unsigned long)(bget(in)) << 24; |
||||
return val; |
||||
} |
||||
|
||||
/* skip bytes in file */ |
||||
local void bskip(bin *in, unsigned skip) |
||||
{ |
||||
/* check pointer */ |
||||
if (in == NULL) |
||||
return; |
||||
|
||||
/* easy case -- skip bytes in buffer */ |
||||
if (skip <= in->left) { |
||||
in->left -= skip; |
||||
in->next += skip; |
||||
return; |
||||
} |
||||
|
||||
/* skip what's in buffer, discard buffer contents */ |
||||
skip -= in->left; |
||||
in->left = 0; |
||||
|
||||
/* seek past multiples of CHUNK bytes */ |
||||
if (skip > CHUNK) { |
||||
unsigned left; |
||||
|
||||
left = skip & (CHUNK - 1); |
||||
if (left == 0) { |
||||
/* exact number of chunks: seek all the way minus one byte to check
|
||||
for end-of-file with a read */ |
||||
lseek(in->fd, skip - 1, SEEK_CUR); |
||||
if (read(in->fd, in->buf, 1) != 1) |
||||
bail("unexpected end of file on ", in->name); |
||||
return; |
||||
} |
||||
|
||||
/* skip the integral chunks, update skip with remainder */ |
||||
lseek(in->fd, skip - left, SEEK_CUR); |
||||
skip = left; |
||||
} |
||||
|
||||
/* read more input and skip remainder */ |
||||
bload(in); |
||||
if (skip > in->left) |
||||
bail("unexpected end of file on ", in->name); |
||||
in->left -= skip; |
||||
in->next += skip; |
||||
} |
||||
|
||||
/* -- end of buffered input functions -- */ |
||||
|
||||
/* skip the gzip header from file in */ |
||||
local void gzhead(bin *in) |
||||
{ |
||||
int flags; |
||||
|
||||
/* verify gzip magic header and compression method */ |
||||
if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) |
||||
bail(in->name, " is not a valid gzip file"); |
||||
|
||||
/* get and verify flags */ |
||||
flags = bget(in); |
||||
if ((flags & 0xe0) != 0) |
||||
bail("unknown reserved bits set in ", in->name); |
||||
|
||||
/* skip modification time, extra flags, and os */ |
||||
bskip(in, 6); |
||||
|
||||
/* skip extra field if present */ |
||||
if (flags & 4) { |
||||
unsigned len; |
||||
|
||||
len = bget(in); |
||||
len += (unsigned)(bget(in)) << 8; |
||||
bskip(in, len); |
||||
} |
||||
|
||||
/* skip file name if present */ |
||||
if (flags & 8) |
||||
while (bget(in) != 0) |
||||
; |
||||
|
||||
/* skip comment if present */ |
||||
if (flags & 16) |
||||
while (bget(in) != 0) |
||||
; |
||||
|
||||
/* skip header crc if present */ |
||||
if (flags & 2) |
||||
bskip(in, 2); |
||||
} |
||||
|
||||
/* write a four-byte little-endian unsigned integer to out */ |
||||
local void put4(unsigned long val, FILE *out) |
||||
{ |
||||
putc(val & 0xff, out); |
||||
putc((val >> 8) & 0xff, out); |
||||
putc((val >> 16) & 0xff, out); |
||||
putc((val >> 24) & 0xff, out); |
||||
} |
||||
|
||||
/* Load up zlib stream from buffered input, bail if end of file */ |
||||
local void zpull(z_streamp strm, bin *in) |
||||
{ |
||||
if (in->left == 0) |
||||
bload(in); |
||||
if (in->left == 0) |
||||
bail("unexpected end of file on ", in->name); |
||||
strm->avail_in = in->left; |
||||
strm->next_in = in->next; |
||||
} |
||||
|
||||
/* Write header for gzip file to out and initialize trailer. */ |
||||
local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) |
||||
{ |
||||
fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); |
||||
*crc = crc32(0L, Z_NULL, 0); |
||||
*tot = 0; |
||||
} |
||||
|
||||
/* Copy the compressed data from name, zeroing the last block bit of the last
|
||||
block if clr is true, and adding empty blocks as needed to get to a byte |
||||
boundary. If clr is false, then the last block becomes the last block of |
||||
the output, and the gzip trailer is written. crc and tot maintains the |
||||
crc and length (modulo 2^32) of the output for the trailer. The resulting |
||||
gzip file is written to out. gzinit() must be called before the first call |
||||
of gzcopy() to write the gzip header and to initialize crc and tot. */ |
||||
local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, |
||||
FILE *out) |
||||
{ |
||||
int ret; /* return value from zlib functions */ |
||||
int pos; /* where the "last block" bit is in byte */ |
||||
int last; /* true if processing the last block */ |
||||
bin *in; /* buffered input file */ |
||||
unsigned char *start; /* start of compressed data in buffer */ |
||||
unsigned char *junk; /* buffer for uncompressed data -- discarded */ |
||||
z_off_t len; /* length of uncompressed data (support > 4 GB) */ |
||||
z_stream strm; /* zlib inflate stream */ |
||||
|
||||
/* open gzip file and skip header */ |
||||
in = bopen(name); |
||||
if (in == NULL) |
||||
bail("could not open ", name); |
||||
gzhead(in); |
||||
|
||||
/* allocate buffer for uncompressed data and initialize raw inflate
|
||||
stream */ |
||||
junk = malloc(CHUNK); |
||||
strm.zalloc = Z_NULL; |
||||
strm.zfree = Z_NULL; |
||||
strm.opaque = Z_NULL; |
||||
strm.avail_in = 0; |
||||
strm.next_in = Z_NULL; |
||||
ret = inflateInit2(&strm, -15); |
||||
if (junk == NULL || ret != Z_OK) |
||||
bail("out of memory", ""); |
||||
|
||||
/* inflate and copy compressed data, clear last-block bit if requested */ |
||||
len = 0; |
||||
zpull(&strm, in); |
||||
start = strm.next_in; |
||||
last = start[0] & 1; |
||||
if (last && clr) |
||||
start[0] &= ~1; |
||||
strm.avail_out = 0; |
||||
for (;;) { |
||||
/* if input used and output done, write used input and get more */ |
||||
if (strm.avail_in == 0 && strm.avail_out != 0) { |
||||
fwrite(start, 1, strm.next_in - start, out); |
||||
start = in->buf; |
||||
in->left = 0; |
||||
zpull(&strm, in); |
||||
} |
||||
|
||||
/* decompress -- return early when end-of-block reached */ |
||||
strm.avail_out = CHUNK; |
||||
strm.next_out = junk; |
||||
ret = inflate(&strm, Z_BLOCK); |
||||
switch (ret) { |
||||
case Z_MEM_ERROR: |
||||
bail("out of memory", ""); |
||||
case Z_DATA_ERROR: |
||||
bail("invalid compressed data in ", in->name); |
||||
} |
||||
|
||||
/* update length of uncompressed data */ |
||||
len += CHUNK - strm.avail_out; |
||||
|
||||
/* check for block boundary (only get this when block copied out) */ |
||||
if (strm.data_type & 128) { |
||||
/* if that was the last block, then done */ |
||||
if (last) |
||||
break; |
||||
|
||||
/* number of unused bits in last byte */ |
||||
pos = strm.data_type & 7; |
||||
|
||||
/* find the next last-block bit */ |
||||
if (pos != 0) { |
||||
/* next last-block bit is in last used byte */ |
||||
pos = 0x100 >> pos; |
||||
last = strm.next_in[-1] & pos; |
||||
if (last && clr) |
||||
strm.next_in[-1] &= ~pos; |
||||
} |
||||
else { |
||||
/* next last-block bit is in next unused byte */ |
||||
if (strm.avail_in == 0) { |
||||
/* don't have that byte yet -- get it */ |
||||
fwrite(start, 1, strm.next_in - start, out); |
||||
start = in->buf; |
||||
in->left = 0; |
||||
zpull(&strm, in); |
||||
} |
||||
last = strm.next_in[0] & 1; |
||||
if (last && clr) |
||||
strm.next_in[0] &= ~1; |
||||
} |
||||
} |
||||
} |
||||
|
||||
/* update buffer with unused input */ |
||||
in->left = strm.avail_in; |
||||
in->next = strm.next_in; |
||||
|
||||
/* copy used input, write empty blocks to get to byte boundary */ |
||||
pos = strm.data_type & 7; |
||||
fwrite(start, 1, in->next - start - 1, out); |
||||
last = in->next[-1]; |
||||
if (pos == 0 || !clr) |
||||
/* already at byte boundary, or last file: write last byte */ |
||||
putc(last, out); |
||||
else { |
||||
/* append empty blocks to last byte */ |
||||
last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ |
||||
if (pos & 1) { |
||||
/* odd -- append an empty stored block */ |
||||
putc(last, out); |
||||
if (pos == 1) |
||||
putc(0, out); /* two more bits in block header */ |
||||
fwrite("\0\0\xff\xff", 1, 4, out); |
||||
} |
||||
else { |
||||
/* even -- append 1, 2, or 3 empty fixed blocks */ |
||||
switch (pos) { |
||||
case 6: |
||||
putc(last | 8, out); |
||||
last = 0; |
||||
case 4: |
||||
putc(last | 0x20, out); |
||||
last = 0; |
||||
case 2: |
||||
putc(last | 0x80, out); |
||||
putc(0, out); |
||||
} |
||||
} |
||||
} |
||||
|
||||
/* update crc and tot */ |
||||
*crc = crc32_combine(*crc, bget4(in), len); |
||||
*tot += (unsigned long)len; |
||||
|
||||
/* clean up */ |
||||
inflateEnd(&strm); |
||||
free(junk); |
||||
bclose(in); |
||||
|
||||
/* write trailer if this is the last gzip file */ |
||||
if (!clr) { |
||||
put4(*crc, out); |
||||
put4(*tot, out); |
||||
} |
||||
} |
||||
|
||||
/* join the gzip files on the command line, write result to stdout */ |
||||
int main(int argc, char **argv) |
||||
{ |
||||
unsigned long crc, tot; /* running crc and total uncompressed length */ |
||||
|
||||
/* skip command name */ |
||||
argc--; |
||||
argv++; |
||||
|
||||
/* show usage if no arguments */ |
||||
if (argc == 0) { |
||||
fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", |
||||
stderr); |
||||
return 0; |
||||
} |
||||
|
||||
/* join gzip files on command line and write to stdout */ |
||||
gzinit(&crc, &tot, stdout); |
||||
while (argc--) |
||||
gzcopy(*argv++, argc, &crc, &tot, stdout); |
||||
|
||||
/* done */ |
||||
return 0; |
||||
} |
@ -0,0 +1,413 @@ |
||||
/*
|
||||
* gzlog.c |
||||
* Copyright (C) 2004 Mark Adler |
||||
* For conditions of distribution and use, see copyright notice in gzlog.h |
||||
* version 1.0, 26 Nov 2004 |
||||
* |
||||
*/ |
||||
|
||||
#include <string.h> /* memcmp() */ |
||||
#include <stdlib.h> /* malloc(), free(), NULL */ |
||||
#include <sys/types.h> /* size_t, off_t */ |
||||
#include <unistd.h> /* read(), close(), sleep(), ftruncate(), */ |
||||
/* lseek() */ |
||||
#include <fcntl.h> /* open() */ |
||||
#include <sys/file.h> /* flock() */ |
||||
#include "zlib.h" /* deflateInit2(), deflate(), deflateEnd() */ |
||||
|
||||
#include "gzlog.h" /* interface */ |
||||
#define local static |
||||
|
||||
/* log object structure */ |
||||
typedef struct { |
||||
int id; /* object identifier */ |
||||
int fd; /* log file descriptor */ |
||||
off_t extra; /* offset of extra "ap" subfield */ |
||||
off_t mark_off; /* offset of marked data */ |
||||
off_t last_off; /* offset of last block */ |
||||
unsigned long crc; /* uncompressed crc */ |
||||
unsigned long len; /* uncompressed length (modulo 2^32) */ |
||||
unsigned stored; /* length of current stored block */ |
||||
} gz_log; |
||||
|
||||
#define GZLOGID 19334 /* gz_log object identifier */ |
||||
|
||||
#define LOCK_RETRY 1 /* retry lock once a second */ |
||||
#define LOCK_PATIENCE 1200 /* try about twenty minutes before forcing */ |
||||
|
||||
/* acquire a lock on a file */ |
||||
local int lock(int fd) |
||||
{ |
||||
int patience; |
||||
|
||||
/* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */ |
||||
patience = LOCK_PATIENCE; |
||||
do { |
||||
if (flock(fd, LOCK_EX + LOCK_NB) == 0) |
||||
return 0; |
||||
(void)sleep(LOCK_RETRY); |
||||
patience -= LOCK_RETRY; |
||||
} while (patience > 0); |
||||
|
||||
/* we've run out of patience -- give up */ |
||||
return -1; |
||||
} |
||||
|
||||
/* release lock */ |
||||
local void unlock(int fd) |
||||
{ |
||||
(void)flock(fd, LOCK_UN); |
||||
} |
||||
|
||||
/* release a log object */ |
||||
local void log_clean(gz_log *log) |
||||
{ |
||||
unlock(log->fd); |
||||
(void)close(log->fd); |
||||
free(log); |
||||
} |
||||
|
||||
/* read an unsigned long from a byte buffer little-endian */ |
||||
local unsigned long make_ulg(unsigned char *buf) |
||||
{ |
||||
int n; |
||||
unsigned long val; |
||||
|
||||
val = (unsigned long)(*buf++); |
||||
for (n = 8; n < 32; n += 8) |
||||
val += (unsigned long)(*buf++) << n; |
||||
return val; |
||||
} |
||||
|
||||
/* read an off_t from a byte buffer little-endian */ |
||||
local off_t make_off(unsigned char *buf) |
||||
{ |
||||
int n; |
||||
off_t val; |
||||
|
||||
val = (off_t)(*buf++); |
||||
for (n = 8; n < 64; n += 8) |
||||
val += (off_t)(*buf++) << n; |
||||
return val; |
||||
} |
||||
|
||||
/* write an unsigned long little-endian to byte buffer */ |
||||
local void dice_ulg(unsigned long val, unsigned char *buf) |
||||
{ |
||||
int n; |
||||
|
||||
for (n = 0; n < 4; n++) { |
||||
*buf++ = val & 0xff; |
||||
val >>= 8; |
||||
} |
||||
} |
||||
|
||||
/* write an off_t little-endian to byte buffer */ |
||||
local void dice_off(off_t val, unsigned char *buf) |
||||
{ |
||||
int n; |
||||
|
||||
for (n = 0; n < 8; n++) { |
||||
*buf++ = val & 0xff; |
||||
val >>= 8; |
||||
} |
||||
} |
||||
|
||||
/* initial, empty gzip file for appending */ |
||||
local char empty_gz[] = { |
||||
0x1f, 0x8b, /* magic gzip id */ |
||||
8, /* compression method is deflate */ |
||||
4, /* there is an extra field */ |
||||
0, 0, 0, 0, /* no modification time provided */ |
||||
0, 0xff, /* no extra flags, no OS */ |
||||
20, 0, 'a', 'p', 16, 0, /* extra field with "ap" subfield */ |
||||
32, 0, 0, 0, 0, 0, 0, 0, /* offset of uncompressed data */ |
||||
32, 0, 0, 0, 0, 0, 0, 0, /* offset of last block */ |
||||
1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ |
||||
0, 0, 0, 0, /* crc */ |
||||
0, 0, 0, 0 /* uncompressed length */ |
||||
}; |
||||
|
||||
/* initialize a log object with locking */ |
||||
void *gzlog_open(char *path) |
||||
{ |
||||
unsigned xlen; |
||||
unsigned char temp[20]; |
||||
unsigned sub_len; |
||||
int good; |
||||
gz_log *log; |
||||
|
||||
/* allocate log structure */ |
||||
log = malloc(sizeof(gz_log)); |
||||
if (log == NULL) |
||||
return NULL; |
||||
log->id = GZLOGID; |
||||
|
||||
/* open file, creating it if necessary, and locking it */ |
||||
log->fd = open(path, O_RDWR | O_CREAT, 0600); |
||||
if (log->fd < 0) { |
||||
free(log); |
||||
return NULL; |
||||
} |
||||
if (lock(log->fd)) { |
||||
close(log->fd); |
||||
free(log); |
||||
return NULL; |
||||
} |
||||
|
||||
/* if file is empty, write new gzip stream */ |
||||
if (lseek(log->fd, 0, SEEK_END) == 0) { |
||||
if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) { |
||||
log_clean(log); |
||||
return NULL; |
||||
} |
||||
} |
||||
|
||||
/* check gzip header */ |
||||
(void)lseek(log->fd, 0, SEEK_SET); |
||||
if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f || |
||||
temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) { |
||||
log_clean(log); |
||||
return NULL; |
||||
} |
||||
|
||||
/* process extra field to find "ap" sub-field */ |
||||
xlen = temp[10] + (temp[11] << 8); |
||||
good = 0; |
||||
while (xlen) { |
||||
if (xlen < 4 || read(log->fd, temp, 4) != 4) |
||||
break; |
||||
sub_len = temp[2]; |
||||
sub_len += temp[3] << 8; |
||||
xlen -= 4; |
||||
if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) { |
||||
good = 1; |
||||
break; |
||||
} |
||||
if (xlen < sub_len) |
||||
break; |
||||
(void)lseek(log->fd, sub_len, SEEK_CUR); |
||||
xlen -= sub_len; |
||||
} |
||||
if (!good) { |
||||
log_clean(log); |
||||
return NULL; |
||||
} |
||||
|
||||
/* read in "ap" sub-field */ |
||||
log->extra = lseek(log->fd, 0, SEEK_CUR); |
||||
if (read(log->fd, temp, 16) != 16) { |
||||
log_clean(log); |
||||
return NULL; |
||||
} |
||||
log->mark_off = make_off(temp); |
||||
log->last_off = make_off(temp + 8); |
||||
|
||||
/* get crc, length of gzip file */ |
||||
(void)lseek(log->fd, log->last_off, SEEK_SET); |
||||
if (read(log->fd, temp, 13) != 13 || |
||||
memcmp(temp, "\001\000\000\377\377", 5) != 0) { |
||||
log_clean(log); |
||||
return NULL; |
||||
} |
||||
log->crc = make_ulg(temp + 5); |
||||
log->len = make_ulg(temp + 9); |
||||
|
||||
/* set up to write over empty last block */ |
||||
(void)lseek(log->fd, log->last_off + 5, SEEK_SET); |
||||
log->stored = 0; |
||||
return (void *)log; |
||||
} |
||||
|
||||
/* maximum amount to put in a stored block before starting a new one */ |
||||
#define MAX_BLOCK 16384 |
||||
|
||||
/* write a block to a log object */ |
||||
int gzlog_write(void *obj, char *data, size_t len) |
||||
{ |
||||
size_t some; |
||||
unsigned char temp[5]; |
||||
gz_log *log; |
||||
|
||||
/* check object */ |
||||
log = (gz_log *)obj; |
||||
if (log == NULL || log->id != GZLOGID) |
||||
return 1; |
||||
|
||||
/* write stored blocks until all of the input is written */ |
||||
do { |
||||
some = MAX_BLOCK - log->stored; |
||||
if (some > len) |
||||
some = len; |
||||
if (write(log->fd, data, some) != some) |
||||
return 1; |
||||
log->crc = crc32(log->crc, data, some); |
||||
log->len += some; |
||||
len -= some; |
||||
data += some; |
||||
log->stored += some; |
||||
|
||||
/* if the stored block is full, end it and start another */ |
||||
if (log->stored == MAX_BLOCK) { |
||||
(void)lseek(log->fd, log->last_off, SEEK_SET); |
||||
temp[0] = 0; |
||||
dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), |
||||
temp + 1); |
||||
if (write(log->fd, temp, 5) != 5) |
||||
return 1; |
||||
log->last_off = lseek(log->fd, log->stored, SEEK_CUR); |
||||
(void)lseek(log->fd, 5, SEEK_CUR); |
||||
log->stored = 0; |
||||
} |
||||
} while (len); |
||||
return 0; |
||||
} |
||||
|
||||
/* recompress the remaining stored deflate data in place */ |
||||
local int recomp(gz_log *log) |
||||
{ |
||||
z_stream strm; |
||||
size_t len, max; |
||||
unsigned char *in; |
||||
unsigned char *out; |
||||
unsigned char temp[16]; |
||||
|
||||
/* allocate space and read it all in (it's around 1 MB) */ |
||||
len = log->last_off - log->mark_off; |
||||
max = len + (len >> 12) + (len >> 14) + 11; |
||||
out = malloc(max); |
||||
if (out == NULL) |
||||
return 1; |
||||
in = malloc(len); |
||||
if (in == NULL) { |
||||
free(out); |
||||
return 1; |
||||
} |
||||
(void)lseek(log->fd, log->mark_off, SEEK_SET); |
||||
if (read(log->fd, in, len) != len) { |
||||
free(in); |
||||
free(out); |
||||
return 1; |
||||
} |
||||
|
||||
/* recompress in memory, decoding stored data as we go */ |
||||
/* note: this assumes that unsigned is four bytes or more */ |
||||
/* consider not making that assumption */ |
||||
strm.zalloc = Z_NULL; |
||||
strm.zfree = Z_NULL; |
||||
strm.opaque = Z_NULL; |
||||
if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, |
||||
Z_DEFAULT_STRATEGY) != Z_OK) { |
||||
free(in); |
||||
free(out); |
||||
return 1; |
||||
} |
||||
strm.next_in = in; |
||||
strm.avail_out = max; |
||||
strm.next_out = out; |
||||
while (len >= 5) { |
||||
if (strm.next_in[0] != 0) |
||||
break; |
||||
strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8); |
||||
strm.next_in += 5; |
||||
len -= 5; |
||||
if (strm.avail_in != 0) { |
||||
if (len < strm.avail_in) |
||||
break; |
||||
len -= strm.avail_in; |
||||
(void)deflate(&strm, Z_NO_FLUSH); |
||||
if (strm.avail_in != 0 || strm.avail_out == 0) |
||||
break; |
||||
} |
||||
} |
||||
(void)deflate(&strm, Z_SYNC_FLUSH); |
||||
(void)deflateEnd(&strm); |
||||
free(in); |
||||
if (len != 0 || strm.avail_out == 0) { |
||||
free(out); |
||||
return 1; |
||||
} |
||||
|
||||
/* overwrite stored data with compressed data */ |
||||
(void)lseek(log->fd, log->mark_off, SEEK_SET); |
||||
len = max - strm.avail_out; |
||||
if (write(log->fd, out, len) != len) { |
||||
free(out); |
||||
return 1; |
||||
} |
||||
free(out); |
||||
|
||||
/* write last empty block, crc, and length */ |
||||
log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR); |
||||
temp[0] = 1; |
||||
dice_ulg(0xffffL << 16, temp + 1); |
||||
dice_ulg(log->crc, temp + 5); |
||||
dice_ulg(log->len, temp + 9); |
||||
if (write(log->fd, temp, 13) != 13) |
||||
return 1; |
||||
|
||||
/* truncate file to discard remaining stored data and old trailer */ |
||||
ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR)); |
||||
|
||||
/* update extra field to point to new last empty block */ |
||||
(void)lseek(log->fd, log->extra, SEEK_SET); |
||||
dice_off(log->mark_off, temp); |
||||
dice_off(log->last_off, temp + 8); |
||||
if (write(log->fd, temp, 16) != 16) |
||||
return 1; |
||||
return 0; |
||||
} |
||||
|
||||
/* maximum accumulation of stored blocks before compressing */ |
||||
#define MAX_STORED 1048576 |
||||
|
||||
/* close log object */ |
||||
int gzlog_close(void *obj) |
||||
{ |
||||
unsigned char temp[8]; |
||||
gz_log *log; |
||||
|
||||
/* check object */ |
||||
log = (gz_log *)obj; |
||||
if (log == NULL || log->id != GZLOGID) |
||||
return 1; |
||||
|
||||
/* go to start of most recent block being written */ |
||||
(void)lseek(log->fd, log->last_off, SEEK_SET); |
||||
|
||||
/* if some stuff was put there, update block */ |
||||
if (log->stored) { |
||||
temp[0] = 0; |
||||
dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), |
||||
temp + 1); |
||||
if (write(log->fd, temp, 5) != 5) |
||||
return 1; |
||||
log->last_off = lseek(log->fd, log->stored, SEEK_CUR); |
||||
} |
||||
|
||||
/* write last block (empty) */ |
||||
if (write(log->fd, "\001\000\000\377\377", 5) != 5) |
||||
return 1; |
||||
|
||||
/* write updated crc and uncompressed length */ |
||||
dice_ulg(log->crc, temp); |
||||
dice_ulg(log->len, temp + 4); |
||||
if (write(log->fd, temp, 8) != 8) |
||||
return 1; |
||||
|
||||
/* put offset of that last block in gzip extra block */ |
||||
(void)lseek(log->fd, log->extra + 8, SEEK_SET); |
||||
dice_off(log->last_off, temp); |
||||
if (write(log->fd, temp, 8) != 8) |
||||
return 1; |
||||
|
||||
/* if more than 1 MB stored, then time to compress it */ |
||||
if (log->last_off - log->mark_off > MAX_STORED) { |
||||
if (recomp(log)) |
||||
return 1; |
||||
} |
||||
|
||||
/* unlock and close file */ |
||||
log_clean(log); |
||||
return 0; |
||||
} |
@ -0,0 +1,58 @@ |
||||
/* gzlog.h
|
||||
Copyright (C) 2004 Mark Adler, all rights reserved |
||||
version 1.0, 26 Nov 2004 |
||||
|
||||
This software is provided 'as-is', without any express or implied |
||||
warranty. In no event will the author be held liable for any damages |
||||
arising from the use of this software. |
||||
|
||||
Permission is granted to anyone to use this software for any purpose, |
||||
including commercial applications, and to alter it and redistribute it |
||||
freely, subject to the following restrictions: |
||||
|
||||
1. The origin of this software must not be misrepresented; you must not |
||||
claim that you wrote the original software. If you use this software |
||||
in a product, an acknowledgment in the product documentation would be |
||||
appreciated but is not required. |
||||
2. Altered source versions must be plainly marked as such, and must not be |
||||
misrepresented as being the original software. |
||||
3. This notice may not be removed or altered from any source distribution. |
||||
|
||||
Mark Adler madler@alumni.caltech.edu |
||||
*/ |
||||
|
||||
/*
|
||||
The gzlog object allows writing short messages to a gzipped log file, |
||||
opening the log file locked for small bursts, and then closing it. The log |
||||
object works by appending stored data to the gzip file until 1 MB has been |
||||
accumulated. At that time, the stored data is compressed, and replaces the |
||||
uncompressed data in the file. The log file is truncated to its new size at |
||||
that time. After closing, the log file is always valid gzip file that can |
||||
decompressed to recover what was written. |
||||
|
||||
A gzip header "extra" field contains two file offsets for appending. The |
||||
first points to just after the last compressed data. The second points to |
||||
the last stored block in the deflate stream, which is empty. All of the |
||||
data between those pointers is uncompressed. |
||||
*/ |
||||
|
||||
/* Open a gzlog object, creating the log file if it does not exist. Return
|
||||
NULL on error. Note that gzlog_open() could take a long time to return if |
||||
there is difficulty in locking the file. */ |
||||
void *gzlog_open(char *path); |
||||
|
||||
/* Write to a gzlog object. Return non-zero on error. This function will
|
||||
simply write data to the file uncompressed. Compression of the data |
||||
will not occur until gzlog_close() is called. It is expected that |
||||
gzlog_write() is used for a short message, and then gzlog_close() is |
||||
called. If a large amount of data is to be written, then the application |
||||
should write no more than 1 MB at a time with gzlog_write() before |
||||
calling gzlog_close() and then gzlog_open() again. */ |
||||
int gzlog_write(void *log, char *data, size_t len); |
||||
|
||||
/* Close a gzlog object. Return non-zero on error. The log file is locked
|
||||
until this function is called. This function will compress stored data |
||||
at the end of the gzip file if at least 1 MB has been accumulated. Note |
||||
that the file will not be a valid gzip file until this function completes. |
||||
*/ |
||||
int gzlog_close(void *log); |
@ -0,0 +1,522 @@ |
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" |
||||
"http://www.w3.org/TR/REC-html40/loose.dtd"> |
||||
<html> |
||||
<head> |
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> |
||||
<title>zlib Usage Example</title> |
||||
<!-- Copyright (c) 2004 Mark Adler. --> |
||||
</head> |
||||
<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000"> |
||||
<h2 align="center"> zlib Usage Example </h2> |
||||
We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used. |
||||
Users wonder when they should provide more input, when they should use more output, |
||||
what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and |
||||
so on. So for those who have read <tt>zlib.h</tt> (a few times), and |
||||
would like further edification, below is an annotated example in C of simple routines to compress and decompress |
||||
from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively. The |
||||
annotations are interspersed between lines of the code. So please read between the lines. |
||||
We hope this helps explain some of the intricacies of <em>zlib</em>. |
||||
<p> |
||||
Without further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>: |
||||
<pre><b> |
||||
/* zpipe.c: example of proper use of zlib's inflate() and deflate() |
||||
Not copyrighted -- provided to the public domain |
||||
Version 1.2 9 November 2004 Mark Adler */ |
||||
|
||||
/* Version history: |
||||
1.0 30 Oct 2004 First version |
||||
1.1 8 Nov 2004 Add void casting for unused return values |
||||
Use switch statement for inflate() return values |
||||
1.2 9 Nov 2004 Add assertions to document zlib guarantees |
||||
*/ |
||||
</b></pre><!-- --> |
||||
We now include the header files for the required definitions. From |
||||
<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>, |
||||
<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and |
||||
<tt>fputs()</tt> for error messages. From <tt>string.h</tt> we use |
||||
<tt>strcmp()</tt> for command line argument processing. |
||||
From <tt>assert.h</tt> we use the <tt>assert()</tt> macro. |
||||
From <tt>zlib.h</tt> |
||||
we use the basic compression functions <tt>deflateInit()</tt>, |
||||
<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression |
||||
functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and |
||||
<tt>inflateEnd()</tt>. |
||||
<pre><b> |
||||
#include <stdio.h> |
||||
#include <string.h> |
||||
#include <assert.h> |
||||
#include "zlib.h" |
||||
</b></pre><!-- --> |
||||
<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data |
||||
from the <em>zlib</em> routines. Larger buffer sizes would be more efficient, |
||||
especially for <tt>inflate()</tt>. If the memory is available, buffers sizes |
||||
on the order of 128K or 256K bytes should be used. |
||||
<pre><b> |
||||
#define CHUNK 16384 |
||||
</b></pre><!-- --> |
||||
The <tt>def()</tt> routine compresses data from an input file to an output file. The output data |
||||
will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em> |
||||
formats. The <em>zlib</em> format has a very small header of only two bytes to identify it as |
||||
a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast |
||||
check value to verify the integrity of the uncompressed data after decoding. |
||||
<pre><b> |
||||
/* Compress from file source to file dest until EOF on source. |
||||
def() returns Z_OK on success, Z_MEM_ERROR if memory could not be |
||||
allocated for processing, Z_STREAM_ERROR if an invalid compression |
||||
level is supplied, Z_VERSION_ERROR if the version of zlib.h and the |
||||
version of the library linked do not match, or Z_ERRNO if there is |
||||
an error reading or writing the files. */ |
||||
int def(FILE *source, FILE *dest, int level) |
||||
{ |
||||
</b></pre> |
||||
Here are the local variables for <tt>def()</tt>. <tt>ret</tt> will be used for <em>zlib</em> |
||||
return codes. <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>, |
||||
which is either no flushing, or flush to completion after the end of the input file is reached. |
||||
<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>. The <tt>strm</tt> structure |
||||
is used to pass information to and from the <em>zlib</em> routines, and to maintain the |
||||
<tt>deflate()</tt> state. <tt>in</tt> and <tt>out</tt> are the input and output buffers for |
||||
<tt>deflate()</tt>. |
||||
<pre><b> |
||||
int ret, flush; |
||||
unsigned have; |
||||
z_stream strm; |
||||
char in[CHUNK]; |
||||
char out[CHUNK]; |
||||
</b></pre><!-- --> |
||||
The first thing we do is to initialize the <em>zlib</em> state for compression using |
||||
<tt>deflateInit()</tt>. This must be done before the first use of <tt>deflate()</tt>. |
||||
The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt> |
||||
structure must be initialized before calling <tt>deflateInit()</tt>. Here they are |
||||
set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use |
||||
the default memory allocation routines. An application may also choose to provide |
||||
custom memory allocation routines here. <tt>deflateInit()</tt> will allocate on the |
||||
order of 256K bytes for the internal state. |
||||
(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.) |
||||
<p> |
||||
<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and |
||||
the compression level, which is an integer in the range of -1 to 9. Lower compression |
||||
levels result in faster execution, but less compression. Higher levels result in |
||||
greater compression, but slower execution. The <em>zlib</em> constant Z_DEFAULT_COMPRESSION, |
||||
equal to -1, |
||||
provides a good compromise between compression and speed and is equivalent to level 6. |
||||
Level 0 actually does no compression at all, and in fact expands the data slightly to produce |
||||
the <em>zlib</em> format (it is not a byte-for-byte copy of the input). |
||||
More advanced applications of <em>zlib</em> |
||||
may use <tt>deflateInit2()</tt> here instead. Such an application may want to reduce how |
||||
much memory will be used, at some price in compression. Or it may need to request a |
||||
<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw |
||||
encoding with no header or trailer at all. |
||||
<p> |
||||
We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant |
||||
<tt>Z_OK</tt> to make sure that it was able to |
||||
allocate memory for the internal state, and that the provided arguments were valid. |
||||
<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt> |
||||
file came from matches the version of <em>zlib</em> actually linked with the program. This |
||||
is especially important for environments in which <em>zlib</em> is a shared library. |
||||
<p> |
||||
Note that an application can initialize multiple, independent <em>zlib</em> streams, which can |
||||
operate in parallel. The state information maintained in the structure allows the <em>zlib</em> |
||||
routines to be reentrant. |
||||
<pre><b> |
||||
/* allocate deflate state */ |
||||
strm.zalloc = Z_NULL; |
||||
strm.zfree = Z_NULL; |
||||
strm.opaque = Z_NULL; |
||||
ret = deflateInit(&strm, level); |
||||
if (ret != Z_OK) |
||||
return ret; |
||||
</b></pre><!-- --> |
||||
With the pleasantries out of the way, now we can get down to business. The outer <tt>do</tt>-loop |
||||
reads all of the input file and exits at the bottom of the loop once end-of-file is reached. |
||||
This loop contains the only call of <tt>deflate()</tt>. So we must make sure that all of the |
||||
input data has been processed and that all of the output data has been generated and consumed |
||||
before we fall out of the loop at the bottom. |
||||
<pre><b> |
||||
/* compress until end of file */ |
||||
do { |
||||
</b></pre> |
||||
We start off by reading data from the input file. The number of bytes read is put directly |
||||
into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>. We also |
||||
check to see if end-of-file on the input has been reached. If we are at the end of file, then <tt>flush</tt> is set to the |
||||
<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to |
||||
indicate that this is the last chunk of input data to compress. We need to use <tt>feof()</tt> |
||||
to check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read. The |
||||
reason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss |
||||
the fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish |
||||
up the compressed stream. If we are not yet at the end of the input, then the <em>zlib</em> |
||||
constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still |
||||
in the middle of the uncompressed data. |
||||
<p> |
||||
If there is an error in reading from the input file, the process is aborted with |
||||
<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning |
||||
the error. We wouldn't want a memory leak, now would we? <tt>deflateEnd()</tt> can be called |
||||
at any time after the state has been initialized. Once that's done, <tt>deflateInit()</tt> (or |
||||
<tt>deflateInit2()</tt>) would have to be called to start a new compression process. There is |
||||
no point here in checking the <tt>deflateEnd()</tt> return code. The deallocation can't fail. |
||||
<pre><b> |
||||
strm.avail_in = fread(in, 1, CHUNK, source); |
||||
if (ferror(source)) { |
||||
(void)deflateEnd(&strm); |
||||
return Z_ERRNO; |
||||
} |
||||
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; |
||||
strm.next_in = in; |
||||
</b></pre><!-- --> |
||||
The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then |
||||
keeps calling <tt>deflate()</tt> until it is done producing output. Once there is no more |
||||
new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e., |
||||
<tt>avail_in</tt> will be zero. |
||||
<pre><b> |
||||
/* run deflate() on input until output buffer not full, finish |
||||
compression if all of source has been read in */ |
||||
do { |
||||
</b></pre> |
||||
Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number |
||||
of available output bytes and <tt>next_out</tt> to a pointer to that space. |
||||
<pre><b> |
||||
strm.avail_out = CHUNK; |
||||
strm.next_out = out; |
||||
</b></pre> |
||||
Now we call the compression engine itself, <tt>deflate()</tt>. It takes as many of the |
||||
<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as |
||||
<tt>avail_out</tt> bytes to <tt>next_out</tt>. Those counters and pointers are then |
||||
updated past the input data consumed and the output data written. It is the amount of |
||||
output space available that may limit how much input is consumed. |
||||
Hence the inner loop to make sure that |
||||
all of the input is consumed by providing more output space each time. Since <tt>avail_in</tt> |
||||
and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those |
||||
between <tt>deflate()</tt> calls until it's all used up. |
||||
<p> |
||||
The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing |
||||
the input and output information and the internal compression engine state, and a parameter |
||||
indicating whether and how to flush data to the output. Normally <tt>deflate</tt> will consume |
||||
several K bytes of input data before producing any output (except for the header), in order |
||||
to accumulate statistics on the data for optimum compression. It will then put out a burst of |
||||
compressed data, and proceed to consume more input before the next burst. Eventually, |
||||
<tt>deflate()</tt> |
||||
must be told to terminate the stream, complete the compression with provided input data, and |
||||
write out the trailer check value. <tt>deflate()</tt> will continue to compress normally as long |
||||
as the flush parameter is <tt>Z_NO_FLUSH</tt>. Once the <tt>Z_FINISH</tt> parameter is provided, |
||||
<tt>deflate()</tt> will begin to complete the compressed output stream. However depending on how |
||||
much output space is provided, <tt>deflate()</tt> may have to be called several times until it |
||||
has provided the complete compressed stream, even after it has consumed all of the input. The flush |
||||
parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls. |
||||
<p> |
||||
There are other values of the flush parameter that are used in more advanced applications. You can |
||||
force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided |
||||
so far, even if it wouldn't have otherwise, for example to control data latency on a link with |
||||
compressed data. You can also ask that <tt>deflate()</tt> do that as well as erase any history up to |
||||
that point so that what follows can be decompressed independently, for example for random access |
||||
applications. Both requests will degrade compression by an amount depending on how often such |
||||
requests are made. |
||||
<p> |
||||
<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here. Why |
||||
not? Well, it turns out that <tt>deflate()</tt> can do no wrong here. Let's go through |
||||
<tt>deflate()</tt>'s return values and dispense with them one by one. The possible values are |
||||
<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>. <tt>Z_OK</tt> |
||||
is, well, ok. <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of |
||||
<tt>deflate()</tt>. This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt> |
||||
until it has no more output. <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not |
||||
initialized properly, but we did initialize it properly. There is no harm in checking for |
||||
<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some |
||||
other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state. |
||||
<tt>Z_BUF_ERROR</tt> will be explained further below, but |
||||
suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume |
||||
more input or produce more output. <tt>deflate()</tt> can be called again with more output space |
||||
or more available input, which it will be in this code. |
||||
<pre><b> |
||||
ret = deflate(&strm, flush); /* no bad return value */ |
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */ |
||||
</b></pre> |
||||
Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the |
||||
difference between how much space was provided before the call, and how much output space |
||||
is still available after the call. Then that data, if any, is written to the output file. |
||||
We can then reuse the output buffer for the next call of <tt>deflate()</tt>. Again if there |
||||
is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak. |
||||
<pre><b> |
||||
have = CHUNK - strm.avail_out; |
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) { |
||||
(void)deflateEnd(&strm); |
||||
return Z_ERRNO; |
||||
} |
||||
</b></pre> |
||||
The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the |
||||
provided output buffer. Then we know that <tt>deflate()</tt> has done as much as it can with |
||||
the provided input, and that all of that input has been consumed. We can then fall out of this |
||||
loop and reuse the input buffer. |
||||
<p> |
||||
The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill |
||||
the output buffer, leaving <tt>avail_out</tt> greater than zero. However suppose that |
||||
<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer! |
||||
<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can. |
||||
As far as we know, <tt>deflate()</tt> |
||||
has more output for us. So we call it again. But now <tt>deflate()</tt> produces no output |
||||
at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>. That <tt>deflate()</tt> call |
||||
wasn't able to do anything, either consume input or produce output, and so it returns |
||||
<tt>Z_BUF_ERROR</tt>. (See, I told you I'd cover this later.) However this is not a problem at |
||||
all. Now we finally have the desired indication that <tt>deflate()</tt> is really done, |
||||
and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>. |
||||
<p> |
||||
With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will |
||||
complete the output stream. Once that is done, subsequent calls of <tt>deflate()</tt> would return |
||||
<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing |
||||
until the state is reinitialized. |
||||
<p> |
||||
Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt> |
||||
instead of the single inner loop we have here. The first loop would call |
||||
without flushing and feed all of the data to <tt>deflate()</tt>. The second loop would call |
||||
<tt>deflate()</tt> with no more |
||||
data and the <tt>Z_FINISH</tt> parameter to complete the process. As you can see from this |
||||
example, that can be avoided by simply keeping track of the current flush state. |
||||
<pre><b> |
||||
} while (strm.avail_out == 0); |
||||
assert(strm.avail_in == 0); /* all input will be used */ |
||||
</b></pre><!-- --> |
||||
Now we check to see if we have already processed all of the input file. That information was |
||||
saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>. If so, |
||||
then we're done and we fall out of the outer loop. We're guaranteed to get <tt>Z_STREAM_END</tt> |
||||
from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was |
||||
consumed and all of the output was generated. |
||||
<pre><b> |
||||
/* done when last data in file processed */ |
||||
} while (flush != Z_FINISH); |
||||
assert(ret == Z_STREAM_END); /* stream will be complete */ |
||||
</b></pre><!-- --> |
||||
The process is complete, but we still need to deallocate the state to avoid a memory leak |
||||
(or rather more like a memory hemorrhage if you didn't do this). Then |
||||
finally we can return with a happy return value. |
||||
<pre><b> |
||||
/* clean up and return */ |
||||
(void)deflateEnd(&strm); |
||||
return Z_OK; |
||||
} |
||||
</b></pre><!-- --> |
||||
Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt> |
||||
decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the |
||||
uncompressed data to the output file. Much of the discussion above for <tt>def()</tt> |
||||
applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between |
||||
the two. |
||||
<pre><b> |
||||
/* Decompress from file source to file dest until stream ends or EOF. |
||||
inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be |
||||
allocated for processing, Z_DATA_ERROR if the deflate data is |
||||
invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and |
||||
the version of the library linked do not match, or Z_ERRNO if there |
||||
is an error reading or writing the files. */ |
||||
int inf(FILE *source, FILE *dest) |
||||
{ |
||||
</b></pre> |
||||
The local variables have the same functionality as they do for <tt>def()</tt>. The |
||||
only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt> |
||||
can tell from the <em>zlib</em> stream itself when the stream is complete. |
||||
<pre><b> |
||||
int ret; |
||||
unsigned have; |
||||
z_stream strm; |
||||
char in[CHUNK]; |
||||
char out[CHUNK]; |
||||
</b></pre><!-- --> |
||||
The initialization of the state is the same, except that there is no compression level, |
||||
of course, and two more elements of the structure are initialized. <tt>avail_in</tt> |
||||
and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>. This |
||||
is because the application has the option to provide the start of the zlib stream in |
||||
order for <tt>inflateInit()</tt> to have access to information about the compression |
||||
method to aid in memory allocation. In the current implementation of <em>zlib</em> |
||||
(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of |
||||
<tt>inflate()</tt> anyway. However those fields must be initialized since later versions |
||||
of <em>zlib</em> that provide more compression methods may take advantage of this interface. |
||||
In any case, no decompression is performed by <tt>inflateInit()</tt>, so the |
||||
<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling. |
||||
<p> |
||||
Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to |
||||
indicate that no input data is being provided. |
||||
<pre><b> |
||||
/* allocate inflate state */ |
||||
strm.zalloc = Z_NULL; |
||||
strm.zfree = Z_NULL; |
||||
strm.opaque = Z_NULL; |
||||
strm.avail_in = 0; |
||||
strm.next_in = Z_NULL; |
||||
ret = inflateInit(&strm); |
||||
if (ret != Z_OK) |
||||
return ret; |
||||
</b></pre><!-- --> |
||||
The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates |
||||
that it has reached the end of the compressed data and has produced all of the uncompressed |
||||
output. This is in contrast to <tt>def()</tt> which processes all of the input file. |
||||
If end-of-file is reached before the compressed data self-terminates, then the compressed |
||||
data is incomplete and an error is returned. |
||||
<pre><b> |
||||
/* decompress until deflate stream ends or end of file */ |
||||
do { |
||||
</b></pre> |
||||
We read input data and set the <tt>strm</tt> structure accordingly. If we've reached the |
||||
end of the input file, then we leave the outer loop and report an error, since the |
||||
compressed data is incomplete. Note that we may read more data than is eventually consumed |
||||
by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream. |
||||
For applications where <em>zlib</em> streams are embedded in other data, this routine would |
||||
need to be modified to return the unused data, or at least indicate how much of the input |
||||
data was not used, so the application would know where to pick up after the <em>zlib</em> stream. |
||||
<pre><b> |
||||
strm.avail_in = fread(in, 1, CHUNK, source); |
||||
if (ferror(source)) { |
||||
(void)inflateEnd(&strm); |
||||
return Z_ERRNO; |
||||
} |
||||
if (strm.avail_in == 0) |
||||
break; |
||||
strm.next_in = in; |
||||
</b></pre><!-- --> |
||||
The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to |
||||
keep calling <tt>inflate()</tt> until has generated all of the output it can with the |
||||
provided input. |
||||
<pre><b> |
||||
/* run inflate() on input until output buffer not full */ |
||||
do { |
||||
</b></pre> |
||||
Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>. |
||||
<pre><b> |
||||
strm.avail_out = CHUNK; |
||||
strm.next_out = out; |
||||
</b></pre> |
||||
Now we run the decompression engine itself. There is no need to adjust the flush parameter, since |
||||
the <em>zlib</em> format is self-terminating. The main difference here is that there are |
||||
return values that we need to pay attention to. <tt>Z_DATA_ERROR</tt> |
||||
indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format, |
||||
which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was |
||||
corrupted somewhere along the way since it was compressed. The other error to be processed is |
||||
<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt> |
||||
needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>. |
||||
<p> |
||||
Advanced applications may use |
||||
<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the |
||||
first 32K or so of compression. This is noted in the <em>zlib</em> header, so <tt>inflate()</tt> |
||||
requests that that dictionary be provided before it can start to decompress. Without the dictionary, |
||||
correct decompression is not possible. For this routine, we have no idea what the dictionary is, |
||||
so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>. |
||||
<p> |
||||
<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here, |
||||
but could be checked for as noted above for <tt>def()</tt>. <tt>Z_BUF_ERROR</tt> does not need to be |
||||
checked for here, for the same reasons noted for <tt>def()</tt>. <tt>Z_STREAM_END</tt> will be |
||||
checked for later. |
||||
<pre><b> |
||||
ret = inflate(&strm, Z_NO_FLUSH); |
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */ |
||||
switch (ret) { |
||||
case Z_NEED_DICT: |
||||
ret = Z_DATA_ERROR; /* and fall through */ |
||||
case Z_DATA_ERROR: |
||||
case Z_MEM_ERROR: |
||||
(void)inflateEnd(&strm); |
||||
return ret; |
||||
} |
||||
</b></pre> |
||||
The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>. |
||||
<pre><b> |
||||
have = CHUNK - strm.avail_out; |
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) { |
||||
(void)inflateEnd(&strm); |
||||
return Z_ERRNO; |
||||
} |
||||
</b></pre> |
||||
The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated |
||||
by not filling the output buffer, just as for <tt>deflate()</tt>. |
||||
<pre><b> |
||||
} while (strm.avail_out == 0); |
||||
assert(strm.avail_in == 0); /* all input will be used */ |
||||
</b></pre><!-- --> |
||||
The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the |
||||
end of the input <em>zlib</em> stream, has completed the decompression and integrity |
||||
check, and has provided all of the output. This is indicated by the <tt>inflate()</tt> |
||||
return value <tt>Z_STREAM_END</tt>. The inner loop is guaranteed to leave <tt>ret</tt> |
||||
equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end |
||||
of the <em>zlib</em> stream. So if the return value is not <tt>Z_STREAM_END</tt>, the |
||||
loop continues to read more input. |
||||
<pre><b> |
||||
/* done when inflate() says it's done */ |
||||
} while (ret != Z_STREAM_END); |
||||
</b></pre><!-- --> |
||||
At this point, decompression successfully completed, or we broke out of the loop due to no |
||||
more data being available from the input file. If the last <tt>inflate()</tt> return value |
||||
is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error |
||||
is returned. Otherwise, we return with a happy return value. Of course, <tt>inflateEnd()</tt> |
||||
is called first to avoid a memory leak. |
||||
<pre><b> |
||||
/* clean up and return */ |
||||
(void)inflateEnd(&strm); |
||||
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; |
||||
} |
||||
</b></pre><!-- --> |
||||
That ends the routines that directly use <em>zlib</em>. The following routines make this |
||||
a command-line program by running data through the above routines from <tt>stdin</tt> to |
||||
<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>. |
||||
<p> |
||||
<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt> |
||||
and <tt>inf()</tt>, as detailed in their comments above, and print out an error message. |
||||
Note that these are only a subset of the possible return values from <tt>deflate()</tt> |
||||
and <tt>inflate()</tt>. |
||||
<pre><b> |
||||
/* report a zlib or i/o error */ |
||||
void zerr(int ret) |
||||
{ |
||||
fputs("zpipe: ", stderr); |
||||
switch (ret) { |
||||
case Z_ERRNO: |
||||
if (ferror(stdin)) |
||||
fputs("error reading stdin\n", stderr); |
||||
if (ferror(stdout)) |
||||
fputs("error writing stdout\n", stderr); |
||||
break; |
||||
case Z_STREAM_ERROR: |
||||
fputs("invalid compression level\n", stderr); |
||||
break; |
||||
case Z_DATA_ERROR: |
||||
fputs("invalid or incomplete deflate data\n", stderr); |
||||
break; |
||||
case Z_MEM_ERROR: |
||||
fputs("out of memory\n", stderr); |
||||
break; |
||||
case Z_VERSION_ERROR: |
||||
fputs("zlib version mismatch!\n", stderr); |
||||
} |
||||
} |
||||
</b></pre><!-- --> |
||||
Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>. The |
||||
<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if |
||||
no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used. If any other |
||||
arguments are provided, no compression or decompression is performed. Instead a usage |
||||
message is displayed. Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and |
||||
<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress. |
||||
<pre><b> |
||||
/* compress or decompress from stdin to stdout */ |
||||
int main(int argc, char **argv) |
||||
{ |
||||
int ret; |
||||
|
||||
/* do compression if no arguments */ |
||||
if (argc == 1) { |
||||
ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); |
||||
if (ret != Z_OK) |
||||
zerr(ret); |
||||
return ret; |
||||
} |
||||
|
||||
/* do decompression if -d specified */ |
||||
else if (argc == 2 && strcmp(argv[1], "-d") == 0) { |
||||
ret = inf(stdin, stdout); |
||||
if (ret != Z_OK) |
||||
zerr(ret); |
||||
return ret; |
||||
} |
||||
|
||||
/* otherwise, report usage */ |
||||
else { |
||||
fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); |
||||
return 1; |
||||
} |
||||
} |
||||
</b></pre> |
||||
<hr> |
||||
<i>Copyright (c) 2004 by Mark Adler<br>Last modified 13 November 2004</i> |
||||
</body> |
||||
</html> |
@ -0,0 +1,191 @@ |
||||
/* zpipe.c: example of proper use of zlib's inflate() and deflate()
|
||||
Not copyrighted -- provided to the public domain |
||||
Version 1.2 9 November 2004 Mark Adler */ |
||||
|
||||
/* Version history:
|
||||
1.0 30 Oct 2004 First version |
||||
1.1 8 Nov 2004 Add void casting for unused return values |
||||
Use switch statement for inflate() return values |
||||
1.2 9 Nov 2004 Add assertions to document zlib guarantees |
||||
*/ |
||||
|
||||
#include <stdio.h> |
||||
#include <string.h> |
||||
#include <assert.h> |
||||
#include "zlib.h" |
||||
|
||||
#define CHUNK 16384 |
||||
|
||||
/* Compress from file source to file dest until EOF on source.
|
||||
def() returns Z_OK on success, Z_MEM_ERROR if memory could not be |
||||
allocated for processing, Z_STREAM_ERROR if an invalid compression |
||||
level is supplied, Z_VERSION_ERROR if the version of zlib.h and the |
||||
version of the library linked do not match, or Z_ERRNO if there is |
||||
an error reading or writing the files. */ |
||||
int def(FILE *source, FILE *dest, int level) |
||||
{ |
||||
int ret, flush; |
||||
unsigned have; |
||||
z_stream strm; |
||||
char in[CHUNK]; |
||||
char out[CHUNK]; |
||||
|
||||
/* allocate deflate state */ |
||||
strm.zalloc = Z_NULL; |
||||
strm.zfree = Z_NULL; |
||||
strm.opaque = Z_NULL; |
||||
ret = deflateInit(&strm, level); |
||||
if (ret != Z_OK) |
||||
return ret; |
||||
|
||||
/* compress until end of file */ |
||||
do { |
||||
strm.avail_in = fread(in, 1, CHUNK, source); |
||||
if (ferror(source)) { |
||||
(void)deflateEnd(&strm); |
||||
return Z_ERRNO; |
||||
} |
||||
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; |
||||
strm.next_in = in; |
||||
|
||||
/* run deflate() on input until output buffer not full, finish
|
||||
compression if all of source has been read in */ |
||||
do { |
||||
strm.avail_out = CHUNK; |
||||
strm.next_out = out; |
||||
ret = deflate(&strm, flush); /* no bad return value */ |
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */ |
||||
have = CHUNK - strm.avail_out; |
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) { |
||||
(void)deflateEnd(&strm); |
||||
return Z_ERRNO; |
||||
} |
||||
} while (strm.avail_out == 0); |
||||
assert(strm.avail_in == 0); /* all input will be used */ |
||||
|
||||
/* done when last data in file processed */ |
||||
} while (flush != Z_FINISH); |
||||
assert(ret == Z_STREAM_END); /* stream will be complete */ |
||||
|
||||
/* clean up and return */ |
||||
(void)deflateEnd(&strm); |
||||
return Z_OK; |
||||
} |
||||
|
||||
/* Decompress from file source to file dest until stream ends or EOF.
|
||||
inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be |
||||
allocated for processing, Z_DATA_ERROR if the deflate data is |
||||
invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and |
||||
the version of the library linked do not match, or Z_ERRNO if there |
||||
is an error reading or writing the files. */ |
||||
int inf(FILE *source, FILE *dest) |
||||
{ |
||||
int ret; |
||||
unsigned have; |
||||
z_stream strm; |
||||
char in[CHUNK]; |
||||
char out[CHUNK]; |
||||
|
||||
/* allocate inflate state */ |
||||
strm.zalloc = Z_NULL; |
||||
strm.zfree = Z_NULL; |
||||
strm.opaque = Z_NULL; |
||||
strm.avail_in = 0; |
||||
strm.next_in = Z_NULL; |
||||
ret = inflateInit(&strm); |
||||
if (ret != Z_OK) |
||||
return ret; |
||||
|
||||
/* decompress until deflate stream ends or end of file */ |
||||
do { |
||||
strm.avail_in = fread(in, 1, CHUNK, source); |
||||
if (ferror(source)) { |
||||
(void)inflateEnd(&strm); |
||||
return Z_ERRNO; |
||||
} |
||||
if (strm.avail_in == 0) |
||||
break; |
||||
strm.next_in = in; |
||||
|
||||
/* run inflate() on input until output buffer not full */ |
||||
do { |
||||
strm.avail_out = CHUNK; |
||||
strm.next_out = out; |
||||
ret = inflate(&strm, Z_NO_FLUSH); |
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */ |
||||
switch (ret) { |
||||
case Z_NEED_DICT: |
||||
ret = Z_DATA_ERROR; /* and fall through */ |
||||
case Z_DATA_ERROR: |
||||
case Z_MEM_ERROR: |
||||
(void)inflateEnd(&strm); |
||||
return ret; |
||||
} |
||||
have = CHUNK - strm.avail_out; |
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) { |
||||
(void)inflateEnd(&strm); |
||||
return Z_ERRNO; |
||||
} |
||||
} while (strm.avail_out == 0); |
||||
assert(strm.avail_in == 0); /* all input will be used */ |
||||
|
||||
/* done when inflate() says it's done */ |
||||
} while (ret != Z_STREAM_END); |
||||
|
||||
/* clean up and return */ |
||||
(void)inflateEnd(&strm); |
||||
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; |
||||
} |
||||
|
||||
/* report a zlib or i/o error */ |
||||
void zerr(int ret) |
||||
{ |
||||
fputs("zpipe: ", stderr); |
||||
switch (ret) { |
||||
case Z_ERRNO: |
||||
if (ferror(stdin)) |
||||
fputs("error reading stdin\n", stderr); |
||||
if (ferror(stdout)) |
||||
fputs("error writing stdout\n", stderr); |
||||
break; |
||||
case Z_STREAM_ERROR: |
||||
fputs("invalid compression level\n", stderr); |
||||
break; |
||||
case Z_DATA_ERROR: |
||||
fputs("invalid or incomplete deflate data\n", stderr); |
||||
break; |
||||
case Z_MEM_ERROR: |
||||
fputs("out of memory\n", stderr); |
||||
break; |
||||
case Z_VERSION_ERROR: |
||||
fputs("zlib version mismatch!\n", stderr); |
||||
} |
||||
} |
||||
|
||||
/* compress or decompress from stdin to stdout */ |
||||
int main(int argc, char **argv) |
||||
{ |
||||
int ret; |
||||
|
||||
/* do compression if no arguments */ |
||||
if (argc == 1) { |
||||
ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); |
||||
if (ret != Z_OK) |
||||
zerr(ret); |
||||
return ret; |
||||
} |
||||
|
||||
/* do decompression if -d specified */ |
||||
else if (argc == 2 && strcmp(argv[1], "-d") == 0) { |
||||
ret = inf(stdin, stdout); |
||||
if (ret != Z_OK) |
||||
zerr(ret); |
||||
return ret; |
||||
} |
||||
|
||||
/* otherwise, report usage */ |
||||
else { |
||||
fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); |
||||
return 1; |
||||
} |
||||
} |
Loading…
Reference in new issue