Merge 8e55222dae
into c715ad71fe
commit
6b73192c74
6 changed files with 459 additions and 0 deletions
@ -0,0 +1,39 @@ |
|||||||
|
32 : 2 |
||||||
|
39 : 1 |
||||||
|
84 : 1 |
||||||
|
97 : 1 |
||||||
|
98 : 1 |
||||||
|
114 : 1 |
||||||
|
115 : 1 |
||||||
|
119 : 1 |
||||||
|
256 : 1 |
||||||
|
0 : 0 |
||||||
|
1 : 0 |
||||||
|
2 : 0 |
||||||
|
3 : 0 |
||||||
|
4 : 0 |
||||||
|
5 : 0 |
||||||
|
6 : 0 |
||||||
|
7 : 0 |
||||||
|
8 : 0 |
||||||
|
9 : 0 |
||||||
|
10 : 0 |
||||||
|
11 : 0 |
||||||
|
12 : 0 |
||||||
|
13 : 0 |
||||||
|
14 : 0 |
||||||
|
15 : 0 |
||||||
|
16 : 0 |
||||||
|
17 : 0 |
||||||
|
18 : 0 |
||||||
|
19 : 0 |
||||||
|
20 : 0 |
||||||
|
21 : 0 |
||||||
|
22 : 0 |
||||||
|
23 : 0 |
||||||
|
24 : 0 |
||||||
|
25 : 0 |
||||||
|
26 : 0 |
||||||
|
27 : 0 |
||||||
|
28 : 0 |
||||||
|
29 : 0 |
@ -0,0 +1,314 @@ |
|||||||
|
/*
|
||||||
|
Makes a dynamic huffman table given the symbol counts. |
||||||
|
Based on zlib/examples/zpipe.c in zlib 1.2.8 |
||||||
|
|
||||||
|
cd to zlib root directory |
||||||
|
./configure |
||||||
|
make |
||||||
|
cd examples |
||||||
|
cc -O -I.. -o makedht makedht.c ../libz.a |
||||||
|
|
||||||
|
deflate_make_dht( (z_stream *)strm, (int *)lhist, (int *)dhist, (int *)valid_bits );
|
||||||
|
Caller provides lhist and dhist int arrays. A dynamic huffman |
||||||
|
table (DHT) formatted in the manner of Deflate Type 2 block is |
||||||
|
returned in strm. Number of valid bits in the last byte is |
||||||
|
returned in valid_bits. |
||||||
|
|
||||||
|
format_cpb(char *cpbtxt, char *zbuf, int have, int valid_bits ) |
||||||
|
Pretty formats the DHT. |
||||||
|
*/ |
||||||
|
|
||||||
|
/*
|
||||||
|
From command line, supply the Literal/Length/Distance symbols and |
||||||
|
their counts in the *lzcount file. makedht then calls zlib to make |
||||||
|
the dynamic huffman table (DHT). Makedht then writes human |
||||||
|
readable DHT to stdout and binary DHT to <fname>. |
||||||
|
|
||||||
|
[abali@hahn examples]$ ./makedht jabber1.lzcount jabber1.dht |
||||||
|
bytes: 19 invalid bits: 4 |
||||||
|
-------------------------------- |
||||||
|
00000000000000000000000000000094 |
||||||
|
203826000000220058c5a6900244f0c3 |
||||||
|
d7770700000000000000000000000000 |
||||||
|
-------------------------------- |
||||||
|
|
||||||
|
Hex dump of the same: |
||||||
|
[abali@hahn examples]$ xxd jabber1.dht |
||||||
|
0000000: 2038 2600 0000 2200 58c5 a690 0244 f0c3 8&...".X....D.. |
||||||
|
0000010: d777 07 .w. |
||||||
|
|
||||||
|
Notes:
|
||||||
|
Invalid bit count is the number of unused **left-most** bits in the |
||||||
|
last byte. Bit endianness is due to the Deflate specification. |
||||||
|
|
||||||
|
When DEBUG is enabled in zlib, Huffman codes assigned to each |
||||||
|
symbol are also printed to stderr. |
||||||
|
|
||||||
|
The -f flag asks zlib to produce a Huffman code for all the Lit/Len |
||||||
|
(0-285) and Dist (0-29) symbols. The -f flag overrides the symbol |
||||||
|
counts of 0 to 1, and therefore forces the code to be generated for |
||||||
|
all the symbols. In the example below, you can see that the |
||||||
|
result is larger when compared to the previous example. |
||||||
|
|
||||||
|
[abali@hahn examples]$ ./makedht -f jabber1.lzcount jabber1.dht |
||||||
|
bytes: 56 invalid bits: 4 |
||||||
|
-------------------------------- |
||||||
|
000000000000000000000000000001bc |
||||||
|
bde300040208da443232b3f7cedeca48 |
||||||
|
56943d92ec952dbbec19d9ab4284ca4e |
||||||
|
43c8deca56b2f7cc2a65454564af9292 |
||||||
|
f0fbff7e8ffbfd0f0000000000000000 |
||||||
|
-------------------------------- |
||||||
|
|
||||||
|
Sample *.lzcount file |
||||||
|
[abali@hahn examples]$ cat jabber1.lzcount |
||||||
|
32 : 2 |
||||||
|
39 : 1 |
||||||
|
84 : 1 |
||||||
|
97 : 1 |
||||||
|
98 : 1 |
||||||
|
114 : 1 |
||||||
|
115 : 1 |
||||||
|
119 : 1 |
||||||
|
256 : 1 |
||||||
|
0 : 0 |
||||||
|
1 : 0 |
||||||
|
29 : 0 |
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h> |
||||||
|
#include <string.h> |
||||||
|
#include <assert.h> |
||||||
|
#include "zlib.h" |
||||||
|
|
||||||
|
#define CHUNK 16384 |
||||||
|
|
||||||
|
/*
|
||||||
|
Cpb stands for compression parameter block. format_cpb converts the |
||||||
|
zlib produced DHT in zbuf to ASCII text and writes to cpbtxt. Have |
||||||
|
is number of bytes in zbuf. Valid_bits is the value returned from |
||||||
|
deflateMakeDHT(). */ |
||||||
|
|
||||||
|
void format_cpb(char *cpbtxt, char *zbuf, int have, int valid_bits ) |
||||||
|
{ |
||||||
|
int i, b; |
||||||
|
char *ptr; |
||||||
|
char tmp[CHUNK]; |
||||||
|
char hex[] = { '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f' }; |
||||||
|
int invalid_bits; |
||||||
|
|
||||||
|
/* last byte */ |
||||||
|
invalid_bits = ( valid_bits ) ? 8 - valid_bits : 0 ; |
||||||
|
fprintf(stderr,"bytes: %d invalid bits: %d\n", have, invalid_bits ); |
||||||
|
/* format-clear the cpbparm file 1st line */ |
||||||
|
memset( cpbtxt, '0', 32 ); |
||||||
|
/* format the cpb bit count */ |
||||||
|
sprintf( tmp, "%x", 8 * have - invalid_bits ); |
||||||
|
b = strlen( tmp ); |
||||||
|
/* write the bit count to cpb parm 1st line */ |
||||||
|
strncpy( cpbtxt+(32-b), tmp, b ); |
||||||
|
/* continue from the next line */ |
||||||
|
ptr = cpbtxt+32; |
||||||
|
for(i=0; i<have; i++) { |
||||||
|
unsigned char byte; |
||||||
|
if ( i % 16 == 0 ) *(ptr++) = '\n'; /* write 16 bytes per line */ |
||||||
|
byte = (unsigned char) zbuf[i]; |
||||||
|
*(ptr++) = hex[ (byte>>4)&0xf ]; /* convert hex to ASCII */ |
||||||
|
*(ptr++) = hex[ (byte )&0xf ]; |
||||||
|
} |
||||||
|
/* padding for the last line */ |
||||||
|
for(i=have; i< (16*((have+15)/16)); i++) { |
||||||
|
*(ptr++) = '0'; *(ptr++) = '0'; |
||||||
|
} |
||||||
|
*(ptr++) = '\n'; *(ptr++) = 0; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int makedht(char *fname, int *lhist, int *dhist) |
||||||
|
{ |
||||||
|
int ret, flush; |
||||||
|
unsigned have; |
||||||
|
z_stream strm; |
||||||
|
char in[CHUNK]; |
||||||
|
char zbuf[CHUNK]; |
||||||
|
char cpbtxt[CHUNK]; |
||||||
|
FILE *cpbbin; |
||||||
|
int i; |
||||||
|
int valid_bits;
|
||||||
|
int b; |
||||||
|
|
||||||
|
/* file for the CPB binary output */ |
||||||
|
if( NULL == ( cpbbin = fopen( fname, "w" )) ) { |
||||||
|
fprintf( stderr, "error: cannot open %s\n", fname ); |
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
/* allocate deflate state */ |
||||||
|
strm.zalloc = Z_NULL; |
||||||
|
strm.zfree = Z_NULL; |
||||||
|
strm.opaque = Z_NULL; |
||||||
|
|
||||||
|
ret = deflateInit(&strm, Z_DEFAULT_COMPRESSION); |
||||||
|
if (ret != Z_OK) |
||||||
|
return ret; |
||||||
|
|
||||||
|
strm.avail_out = CHUNK; |
||||||
|
strm.next_out = zbuf; |
||||||
|
|
||||||
|
ret = deflate_make_dht( &strm, lhist, dhist, &valid_bits ); |
||||||
|
assert(ret != Z_STREAM_ERROR); |
||||||
|
|
||||||
|
have = CHUNK - strm.avail_out; |
||||||
|
if (fwrite(zbuf, 1, have, cpbbin) != have || ferror(cpbbin)) { |
||||||
|
(void)deflateEnd(&strm); |
||||||
|
return Z_ERRNO; |
||||||
|
} |
||||||
|
fclose( cpbbin ); |
||||||
|
|
||||||
|
format_cpb( cpbtxt, zbuf, have, valid_bits ); |
||||||
|
|
||||||
|
fflush(stdout); |
||||||
|
fputs("--------------------------------\n", stdout ); |
||||||
|
fputs( cpbtxt, stdout ); |
||||||
|
fputs("--------------------------------\n", stdout ); |
||||||
|
fflush(stdout); |
||||||
|
|
||||||
|
(void)deflateEnd(&strm); |
||||||
|
|
||||||
|
return Z_OK; |
||||||
|
} |
||||||
|
|
||||||
|
/* report a zlib or i/o error */ |
||||||
|
void zerr(int ret) |
||||||
|
{ |
||||||
|
fputs("zpipe: ", stderr); |
||||||
|
switch (ret) { |
||||||
|
case Z_ERRNO: |
||||||
|
if (ferror(stdin)) |
||||||
|
fputs("error reading stdin\n", stderr); |
||||||
|
if (ferror(stdout)) |
||||||
|
fputs("error writing stdout\n", stderr); |
||||||
|
break; |
||||||
|
case Z_STREAM_ERROR: |
||||||
|
fputs("invalid compression level\n", stderr); |
||||||
|
break; |
||||||
|
case Z_DATA_ERROR: |
||||||
|
fputs("invalid or incomplete deflate data\n", stderr); |
||||||
|
break; |
||||||
|
case Z_MEM_ERROR: |
||||||
|
fputs("out of memory\n", stderr); |
||||||
|
break; |
||||||
|
case Z_VERSION_ERROR: |
||||||
|
fputs("zlib version mismatch!\n", stderr); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Initialize zero lzcounts to a val. If the same DHT will be used
|
||||||
|
repeatedly by different input data, the DHT must contain a symbol |
||||||
|
for all possible input symbols. Changing zero counts to a nonzero |
||||||
|
count ensures that in the DHT there is a code for every symbol. Of |
||||||
|
course this comes at the expense of DHTs being larger */ |
||||||
|
|
||||||
|
void fill_zero_lzcounts(int *llhist, int *dhist, int val) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
for(i=0; i<286; i++)
|
||||||
|
if( ! llhist[i] )
|
||||||
|
llhist[i] = val; |
||||||
|
for(i=0; i<30; i++)
|
||||||
|
if( ! dhist[i] )
|
||||||
|
dhist[i] = val; |
||||||
|
} |
||||||
|
|
||||||
|
/* read lzcounts from file fname and write them to the int arrays
|
||||||
|
llhist and dhist for Lit/Len and Distance respectively */ |
||||||
|
|
||||||
|
int get_lzcounts(char *fname, int *llhist, int *dhist) |
||||||
|
{ |
||||||
|
int i, lz, prev_lz, count, doll; |
||||||
|
FILE *lzf; |
||||||
|
char buf[1024]; |
||||||
|
if( NULL == ( lzf = fopen( fname, "r" )) ) { |
||||||
|
fprintf( stderr, "error: cannot open %s\n", fname ); |
||||||
|
return 1; |
||||||
|
} |
||||||
|
for(i=0; i<286; i++)
|
||||||
|
llhist[i] = 0; |
||||||
|
for(i=0; i<30; i++)
|
||||||
|
dhist[i] = 0; |
||||||
|
prev_lz=0; |
||||||
|
doll=1; |
||||||
|
|
||||||
|
while( NULL != fgets( buf, 1023, lzf ) ) { |
||||||
|
sscanf( buf, "%d : %d", &lz, &count ); |
||||||
|
if( prev_lz > lz ) /* detect LL to D transition */ |
||||||
|
doll = 0; |
||||||
|
assert( (doll==1 && lz >= 0 && lz <= 285) || (doll==0 && lz >= 0 && lz <= 29 ) ); |
||||||
|
prev_lz = lz; |
||||||
|
if( doll ) |
||||||
|
llhist[ lz ] = count; |
||||||
|
else |
||||||
|
dhist[ lz ] = count; |
||||||
|
} |
||||||
|
llhist[256] = 1; /* The EOB symbol is always present */ |
||||||
|
fclose( lzf ); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
int main(int argc, char **argv) |
||||||
|
{ |
||||||
|
int ret; |
||||||
|
int lhist[286]; |
||||||
|
int dhist[30]; |
||||||
|
|
||||||
|
/* when -f argument is present */ |
||||||
|
if (argc == 4 && strcmp(argv[1], "-f") == 0) { |
||||||
|
|
||||||
|
/* read LZ counts from file */ |
||||||
|
if( get_lzcounts( argv[2], lhist, dhist ) ) |
||||||
|
return 1; |
||||||
|
|
||||||
|
/* change zero counts to one */ |
||||||
|
fill_zero_lzcounts( lhist, dhist, 1 ); |
||||||
|
|
||||||
|
/* make the dht */ |
||||||
|
ret = makedht( argv[3], lhist, dhist ); |
||||||
|
if (ret != Z_OK) |
||||||
|
zerr(ret); |
||||||
|
fflush(stderr); |
||||||
|
|
||||||
|
return ret; |
||||||
|
} |
||||||
|
/* no -f argument */ |
||||||
|
else if( argc == 3 ) { |
||||||
|
|
||||||
|
/* read LZ counts from file */ |
||||||
|
if( get_lzcounts( argv[1], lhist, dhist ) ) |
||||||
|
return 1; |
||||||
|
|
||||||
|
/* make the dht */ |
||||||
|
ret = makedht( argv[2], lhist, dhist ); |
||||||
|
if (ret != Z_OK) |
||||||
|
zerr(ret); |
||||||
|
fflush(stderr); |
||||||
|
|
||||||
|
return ret; |
||||||
|
} |
||||||
|
/* when argument count is wrong, report usage */ |
||||||
|
else { |
||||||
|
fprintf( stderr, "usage:\n"); |
||||||
|
fprintf( stderr, "%s [-f] <lzcount> <dht.bin>\n", argv[0]); |
||||||
|
fprintf( stderr, " <Lzcount> contains a symbol : count pair per line of input.\n"); |
||||||
|
fprintf( stderr, " Lit/Len symbols 0..285 must be followed by Distance symbols 0..29.\n"); |
||||||
|
fprintf( stderr, " Missing symbols have a count of 0 by default.\n"); |
||||||
|
fprintf( stderr, " The optional -f changes 0 counts to 1.\n"); |
||||||
|
fprintf( stderr, " Human readable output is printed to stdout.\n"); |
||||||
|
fprintf( stderr, " Number of bits in the DHT is printed in the first 16 bytes.\n"); |
||||||
|
fprintf( stderr, " Number of unused bits in the DHT tail byte is also printed.\n"); |
||||||
|
fprintf( stderr, " Binary output is dumped to dht.bin.\n");
|
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
} |
Loading…
Reference in new issue