parent
7751bd4c71
commit
67cc20d004
67 changed files with 2822 additions and 3019 deletions
@ -1,2 +1,5 @@ |
|||||||
all: |
all: |
||||||
-@echo "Use ./configure first. Thank you."
|
-@echo "Use ./configure first. Thank you."
|
||||||
|
|
||||||
|
distclean: |
||||||
|
make -f Makefile.in distclean
|
||||||
|
@ -1,43 +0,0 @@ |
|||||||
This is a patched version of zlib modified to use |
|
||||||
Pentium-optimized assembly code in the deflation algorithm. The files |
|
||||||
changed/added by this patch are: |
|
||||||
|
|
||||||
README.586 |
|
||||||
match.S |
|
||||||
|
|
||||||
The effectiveness of these modifications is a bit marginal, as the the |
|
||||||
program's bottleneck seems to be mostly L1-cache contention, for which |
|
||||||
there is no real way to work around without rewriting the basic |
|
||||||
algorithm. The speedup on average is around 5-10% (which is generally |
|
||||||
less than the amount of variance between subsequent executions). |
|
||||||
However, when used at level 9 compression, the cache contention can |
|
||||||
drop enough for the assembly version to achieve 10-20% speedup (and |
|
||||||
sometimes more, depending on the amount of overall redundancy in the |
|
||||||
files). Even here, though, cache contention can still be the limiting |
|
||||||
factor, depending on the nature of the program using the zlib library. |
|
||||||
This may also mean that better improvements will be seen on a Pentium |
|
||||||
with MMX, which suffers much less from L1-cache contention, but I have |
|
||||||
not yet verified this. |
|
||||||
|
|
||||||
Note that this code has been tailored for the Pentium in particular, |
|
||||||
and will not perform well on the Pentium Pro (due to the use of a |
|
||||||
partial register in the inner loop). |
|
||||||
|
|
||||||
If you are using an assembler other than GNU as, you will have to |
|
||||||
translate match.S to use your assembler's syntax. (Have fun.) |
|
||||||
|
|
||||||
Brian Raiter |
|
||||||
breadbox@muppetlabs.com |
|
||||||
April, 1998 |
|
||||||
|
|
||||||
|
|
||||||
Added for zlib 1.1.3: |
|
||||||
|
|
||||||
The patches come from |
|
||||||
http://www.muppetlabs.com/~breadbox/software/assembly.html |
|
||||||
|
|
||||||
To compile zlib with this asm file, copy match.S to the zlib directory |
|
||||||
then do: |
|
||||||
|
|
||||||
CFLAGS="-O3 -DASMV" ./configure |
|
||||||
make OBJA=match.o |
|
@ -1,364 +0,0 @@ |
|||||||
/* match.s -- Pentium-optimized version of longest_match() |
|
||||||
* Written for zlib 1.1.2 |
|
||||||
* Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
|
|
||||||
* |
|
||||||
* This is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License. |
|
||||||
*/ |
|
||||||
|
|
||||||
#ifndef NO_UNDERLINE |
|
||||||
#define match_init _match_init |
|
||||||
#define longest_match _longest_match |
|
||||||
#endif |
|
||||||
|
|
||||||
#define MAX_MATCH (258) |
|
||||||
#define MIN_MATCH (3) |
|
||||||
#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) |
|
||||||
#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) |
|
||||||
|
|
||||||
/* stack frame offsets */ |
|
||||||
|
|
||||||
#define wmask 0 /* local copy of s->wmask */ |
|
||||||
#define window 4 /* local copy of s->window */ |
|
||||||
#define windowbestlen 8 /* s->window + bestlen */ |
|
||||||
#define chainlenscanend 12 /* high word: current chain len */ |
|
||||||
/* low word: last bytes sought */ |
|
||||||
#define scanstart 16 /* first two bytes of string */ |
|
||||||
#define scanalign 20 /* dword-misalignment of string */ |
|
||||||
#define nicematch 24 /* a good enough match size */ |
|
||||||
#define bestlen 28 /* size of best match so far */ |
|
||||||
#define scan 32 /* ptr to string wanting match */ |
|
||||||
|
|
||||||
#define LocalVarsSize (36) |
|
||||||
/* saved ebx 36 */ |
|
||||||
/* saved edi 40 */ |
|
||||||
/* saved esi 44 */ |
|
||||||
/* saved ebp 48 */ |
|
||||||
/* return address 52 */ |
|
||||||
#define deflatestate 56 /* the function arguments */ |
|
||||||
#define curmatch 60 |
|
||||||
|
|
||||||
/* Offsets for fields in the deflate_state structure. These numbers |
|
||||||
* are calculated from the definition of deflate_state, with the |
|
||||||
* assumption that the compiler will dword-align the fields. (Thus, |
|
||||||
* changing the definition of deflate_state could easily cause this |
|
||||||
* program to crash horribly, without so much as a warning at |
|
||||||
* compile time. Sigh.) |
|
||||||
*/ |
|
||||||
|
|
||||||
/* All the +zlib1222add offsets are due to the addition of fields |
|
||||||
* in zlib in the deflate_state structure since the asm code was first written |
|
||||||
* (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). |
|
||||||
* (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). |
|
||||||
* if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). |
|
||||||
*/ |
|
||||||
|
|
||||||
#define zlib1222add (8) |
|
||||||
|
|
||||||
#define dsWSize (36+zlib1222add) |
|
||||||
#define dsWMask (44+zlib1222add) |
|
||||||
#define dsWindow (48+zlib1222add) |
|
||||||
#define dsPrev (56+zlib1222add) |
|
||||||
#define dsMatchLen (88+zlib1222add) |
|
||||||
#define dsPrevMatch (92+zlib1222add) |
|
||||||
#define dsStrStart (100+zlib1222add) |
|
||||||
#define dsMatchStart (104+zlib1222add) |
|
||||||
#define dsLookahead (108+zlib1222add) |
|
||||||
#define dsPrevLen (112+zlib1222add) |
|
||||||
#define dsMaxChainLen (116+zlib1222add) |
|
||||||
#define dsGoodMatch (132+zlib1222add) |
|
||||||
#define dsNiceMatch (136+zlib1222add) |
|
||||||
|
|
||||||
|
|
||||||
.file "match.S" |
|
||||||
|
|
||||||
.globl match_init, longest_match |
|
||||||
|
|
||||||
.text |
|
||||||
|
|
||||||
/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ |
|
||||||
|
|
||||||
longest_match: |
|
||||||
|
|
||||||
/* Save registers that the compiler may be using, and adjust %esp to */ |
|
||||||
/* make room for our stack frame. */ |
|
||||||
|
|
||||||
pushl %ebp |
|
||||||
pushl %edi |
|
||||||
pushl %esi |
|
||||||
pushl %ebx |
|
||||||
subl $LocalVarsSize, %esp |
|
||||||
|
|
||||||
/* Retrieve the function arguments. %ecx will hold cur_match */ |
|
||||||
/* throughout the entire function. %edx will hold the pointer to the */ |
|
||||||
/* deflate_state structure during the function's setup (before */ |
|
||||||
/* entering the main loop). */ |
|
||||||
|
|
||||||
movl deflatestate(%esp), %edx |
|
||||||
movl curmatch(%esp), %ecx |
|
||||||
|
|
||||||
/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ |
|
||||||
|
|
||||||
movl dsNiceMatch(%edx), %eax |
|
||||||
movl dsLookahead(%edx), %ebx |
|
||||||
cmpl %eax, %ebx |
|
||||||
jl LookaheadLess |
|
||||||
movl %eax, %ebx |
|
||||||
LookaheadLess: movl %ebx, nicematch(%esp) |
|
||||||
|
|
||||||
/* register Bytef *scan = s->window + s->strstart; */ |
|
||||||
|
|
||||||
movl dsWindow(%edx), %esi |
|
||||||
movl %esi, window(%esp) |
|
||||||
movl dsStrStart(%edx), %ebp |
|
||||||
lea (%esi,%ebp), %edi |
|
||||||
movl %edi, scan(%esp) |
|
||||||
|
|
||||||
/* Determine how many bytes the scan ptr is off from being */ |
|
||||||
/* dword-aligned. */ |
|
||||||
|
|
||||||
movl %edi, %eax |
|
||||||
negl %eax |
|
||||||
andl $3, %eax |
|
||||||
movl %eax, scanalign(%esp) |
|
||||||
|
|
||||||
/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ |
|
||||||
/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ |
|
||||||
|
|
||||||
movl dsWSize(%edx), %eax |
|
||||||
subl $MIN_LOOKAHEAD, %eax |
|
||||||
subl %eax, %ebp |
|
||||||
jg LimitPositive |
|
||||||
xorl %ebp, %ebp |
|
||||||
LimitPositive: |
|
||||||
|
|
||||||
/* unsigned chain_length = s->max_chain_length; */ |
|
||||||
/* if (s->prev_length >= s->good_match) { */ |
|
||||||
/* chain_length >>= 2; */ |
|
||||||
/* } */ |
|
||||||
|
|
||||||
movl dsPrevLen(%edx), %eax |
|
||||||
movl dsGoodMatch(%edx), %ebx |
|
||||||
cmpl %ebx, %eax |
|
||||||
movl dsMaxChainLen(%edx), %ebx |
|
||||||
jl LastMatchGood |
|
||||||
shrl $2, %ebx |
|
||||||
LastMatchGood: |
|
||||||
|
|
||||||
/* chainlen is decremented once beforehand so that the function can */ |
|
||||||
/* use the sign flag instead of the zero flag for the exit test. */ |
|
||||||
/* It is then shifted into the high word, to make room for the scanend */ |
|
||||||
/* scanend value, which it will always accompany. */ |
|
||||||
|
|
||||||
decl %ebx |
|
||||||
shll $16, %ebx |
|
||||||
|
|
||||||
/* int best_len = s->prev_length; */ |
|
||||||
|
|
||||||
movl dsPrevLen(%edx), %eax |
|
||||||
movl %eax, bestlen(%esp) |
|
||||||
|
|
||||||
/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ |
|
||||||
|
|
||||||
addl %eax, %esi |
|
||||||
movl %esi, windowbestlen(%esp) |
|
||||||
|
|
||||||
/* register ush scan_start = *(ushf*)scan; */ |
|
||||||
/* register ush scan_end = *(ushf*)(scan+best_len-1); */ |
|
||||||
|
|
||||||
movw (%edi), %bx |
|
||||||
movw %bx, scanstart(%esp) |
|
||||||
movw -1(%edi,%eax), %bx |
|
||||||
movl %ebx, chainlenscanend(%esp) |
|
||||||
|
|
||||||
/* Posf *prev = s->prev; */ |
|
||||||
/* uInt wmask = s->w_mask; */ |
|
||||||
|
|
||||||
movl dsPrev(%edx), %edi |
|
||||||
movl dsWMask(%edx), %edx |
|
||||||
mov %edx, wmask(%esp) |
|
||||||
|
|
||||||
/* Jump into the main loop. */ |
|
||||||
|
|
||||||
jmp LoopEntry |
|
||||||
|
|
||||||
.balign 16
|
|
||||||
|
|
||||||
/* do { |
|
||||||
* match = s->window + cur_match;
|
|
||||||
* if (*(ushf*)(match+best_len-1) != scan_end || |
|
||||||
* *(ushf*)match != scan_start) continue;
|
|
||||||
* [...] |
|
||||||
* } while ((cur_match = prev[cur_match & wmask]) > limit |
|
||||||
* && --chain_length != 0);
|
|
||||||
* |
|
||||||
* Here is the inner loop of the function. The function will spend the |
|
||||||
* majority of its time in this loop, and majority of that time will |
|
||||||
* be spent in the first ten instructions. |
|
||||||
* |
|
||||||
* Within this loop: |
|
||||||
* %ebx = chainlenscanend - i.e., ((chainlen << 16) | scanend) |
|
||||||
* %ecx = curmatch |
|
||||||
* %edx = curmatch & wmask |
|
||||||
* %esi = windowbestlen - i.e., (window + bestlen) |
|
||||||
* %edi = prev |
|
||||||
* %ebp = limit |
|
||||||
* |
|
||||||
* Two optimization notes on the choice of instructions: |
|
||||||
* |
|
||||||
* The first instruction uses a 16-bit address, which costs an extra, |
|
||||||
* unpairable cycle. This is cheaper than doing a 32-bit access and |
|
||||||
* zeroing the high word, due to the 3-cycle misalignment penalty which |
|
||||||
* would occur half the time. This also turns out to be cheaper than |
|
||||||
* doing two separate 8-bit accesses, as the memory is so rarely in the |
|
||||||
* L1 cache. |
|
||||||
* |
|
||||||
* The window buffer, however, apparently spends a lot of time in the |
|
||||||
* cache, and so it is faster to retrieve the word at the end of the |
|
||||||
* match string with two 8-bit loads. The instructions that test the |
|
||||||
* word at the beginning of the match string, however, are executed |
|
||||||
* much less frequently, and there it was cheaper to use 16-bit |
|
||||||
* instructions, which avoided the necessity of saving off and |
|
||||||
* subsequently reloading one of the other registers. |
|
||||||
*/ |
|
||||||
LookupLoop: |
|
||||||
/* 1 U & V */ |
|
||||||
movw (%edi,%edx,2), %cx /* 2 U pipe */ |
|
||||||
movl wmask(%esp), %edx /* 2 V pipe */ |
|
||||||
cmpl %ebp, %ecx /* 3 U pipe */ |
|
||||||
jbe LeaveNow /* 3 V pipe */ |
|
||||||
subl $0x00010000, %ebx /* 4 U pipe */ |
|
||||||
js LeaveNow /* 4 V pipe */ |
|
||||||
LoopEntry: movb -1(%esi,%ecx), %al /* 5 U pipe */ |
|
||||||
andl %ecx, %edx /* 5 V pipe */ |
|
||||||
cmpb %bl, %al /* 6 U pipe */ |
|
||||||
jnz LookupLoop /* 6 V pipe */ |
|
||||||
movb (%esi,%ecx), %ah |
|
||||||
cmpb %bh, %ah |
|
||||||
jnz LookupLoop |
|
||||||
movl window(%esp), %eax |
|
||||||
movw (%eax,%ecx), %ax |
|
||||||
cmpw scanstart(%esp), %ax |
|
||||||
jnz LookupLoop |
|
||||||
|
|
||||||
/* Store the current value of chainlen. */ |
|
||||||
|
|
||||||
movl %ebx, chainlenscanend(%esp) |
|
||||||
|
|
||||||
/* Point %edi to the string under scrutiny, and %esi to the string we */ |
|
||||||
/* are hoping to match it up with. In actuality, %esi and %edi are */ |
|
||||||
/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ |
|
||||||
/* initialized to -(MAX_MATCH_8 - scanalign). */ |
|
||||||
|
|
||||||
movl window(%esp), %esi |
|
||||||
movl scan(%esp), %edi |
|
||||||
addl %ecx, %esi |
|
||||||
movl scanalign(%esp), %eax |
|
||||||
movl $(-MAX_MATCH_8), %edx |
|
||||||
lea MAX_MATCH_8(%edi,%eax), %edi |
|
||||||
lea MAX_MATCH_8(%esi,%eax), %esi |
|
||||||
|
|
||||||
/* Test the strings for equality, 8 bytes at a time. At the end, |
|
||||||
* adjust %edx so that it is offset to the exact byte that mismatched. |
|
||||||
* |
|
||||||
* We already know at this point that the first three bytes of the |
|
||||||
* strings match each other, and they can be safely passed over before |
|
||||||
* starting the compare loop. So what this code does is skip over 0-3 |
|
||||||
* bytes, as much as necessary in order to dword-align the %edi |
|
||||||
* pointer. (%esi will still be misaligned three times out of four.) |
|
||||||
* |
|
||||||
* It should be confessed that this loop usually does not represent |
|
||||||
* much of the total running time. Replacing it with a more |
|
||||||
* straightforward "rep cmpsb" would not drastically degrade |
|
||||||
* performance. |
|
||||||
*/ |
|
||||||
LoopCmps: |
|
||||||
movl (%esi,%edx), %eax |
|
||||||
movl (%edi,%edx), %ebx |
|
||||||
xorl %ebx, %eax |
|
||||||
jnz LeaveLoopCmps |
|
||||||
movl 4(%esi,%edx), %eax |
|
||||||
movl 4(%edi,%edx), %ebx |
|
||||||
xorl %ebx, %eax |
|
||||||
jnz LeaveLoopCmps4 |
|
||||||
addl $8, %edx |
|
||||||
jnz LoopCmps |
|
||||||
jmp LenMaximum |
|
||||||
LeaveLoopCmps4: addl $4, %edx |
|
||||||
LeaveLoopCmps: testl $0x0000FFFF, %eax |
|
||||||
jnz LenLower |
|
||||||
addl $2, %edx |
|
||||||
shrl $16, %eax |
|
||||||
LenLower: subb $1, %al |
|
||||||
adcl $0, %edx |
|
||||||
|
|
||||||
/* Calculate the length of the match. If it is longer than MAX_MATCH, */ |
|
||||||
/* then automatically accept it as the best possible match and leave. */ |
|
||||||
|
|
||||||
lea (%edi,%edx), %eax |
|
||||||
movl scan(%esp), %edi |
|
||||||
subl %edi, %eax |
|
||||||
cmpl $MAX_MATCH, %eax |
|
||||||
jge LenMaximum |
|
||||||
|
|
||||||
/* If the length of the match is not longer than the best match we */ |
|
||||||
/* have so far, then forget it and return to the lookup loop. */ |
|
||||||
|
|
||||||
movl deflatestate(%esp), %edx |
|
||||||
movl bestlen(%esp), %ebx |
|
||||||
cmpl %ebx, %eax |
|
||||||
jg LongerMatch |
|
||||||
movl chainlenscanend(%esp), %ebx |
|
||||||
movl windowbestlen(%esp), %esi |
|
||||||
movl dsPrev(%edx), %edi |
|
||||||
movl wmask(%esp), %edx |
|
||||||
andl %ecx, %edx |
|
||||||
jmp LookupLoop |
|
||||||
|
|
||||||
/* s->match_start = cur_match; */ |
|
||||||
/* best_len = len; */ |
|
||||||
/* if (len >= nice_match) break; */ |
|
||||||
/* scan_end = *(ushf*)(scan+best_len-1); */ |
|
||||||
|
|
||||||
LongerMatch: movl nicematch(%esp), %ebx |
|
||||||
movl %eax, bestlen(%esp) |
|
||||||
movl %ecx, dsMatchStart(%edx) |
|
||||||
cmpl %ebx, %eax |
|
||||||
jge LeaveNow |
|
||||||
movl window(%esp), %esi |
|
||||||
addl %eax, %esi |
|
||||||
movl %esi, windowbestlen(%esp) |
|
||||||
movl chainlenscanend(%esp), %ebx |
|
||||||
movw -1(%edi,%eax), %bx |
|
||||||
movl dsPrev(%edx), %edi |
|
||||||
movl %ebx, chainlenscanend(%esp) |
|
||||||
movl wmask(%esp), %edx |
|
||||||
andl %ecx, %edx |
|
||||||
jmp LookupLoop |
|
||||||
|
|
||||||
/* Accept the current string, with the maximum possible length. */ |
|
||||||
|
|
||||||
LenMaximum: movl deflatestate(%esp), %edx |
|
||||||
movl $MAX_MATCH, bestlen(%esp) |
|
||||||
movl %ecx, dsMatchStart(%edx) |
|
||||||
|
|
||||||
/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ |
|
||||||
/* return s->lookahead; */ |
|
||||||
|
|
||||||
LeaveNow: |
|
||||||
movl deflatestate(%esp), %edx |
|
||||||
movl bestlen(%esp), %ebx |
|
||||||
movl dsLookahead(%edx), %eax |
|
||||||
cmpl %eax, %ebx |
|
||||||
jg LookaheadRet |
|
||||||
movl %ebx, %eax |
|
||||||
LookaheadRet: |
|
||||||
|
|
||||||
/* Restore the stack and return from whence we came. */ |
|
||||||
|
|
||||||
addl $LocalVarsSize, %esp |
|
||||||
popl %ebx |
|
||||||
popl %esi |
|
||||||
popl %edi |
|
||||||
popl %ebp |
|
||||||
match_init: ret |
|
@ -0,0 +1,574 @@ |
|||||||
|
/* |
||||||
|
;uInt longest_match_x64(
|
||||||
|
; deflate_state *s,
|
||||||
|
; IPos cur_match); // current match
|
||||||
|
|
||||||
|
; gvmat64.S -- Asm portion of the optimized longest_match for 32 bits x86_64
|
||||||
|
; (AMD64 on Athlon 64, Opteron, Phenom
|
||||||
|
; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7)
|
||||||
|
; this file is translation from gvmat64.asm to GCC 4.x (for Linux, Mac XCode)
|
||||||
|
; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
|
||||||
|
;
|
||||||
|
; File written by Gilles Vollant, by converting to assembly the longest_match
|
||||||
|
; from Jean-loup Gailly in deflate.c of zLib and infoZip zip.
|
||||||
|
; and by taking inspiration on asm686 with masm, optimised assembly code
|
||||||
|
; from Brian Raiter, written 1998
|
||||||
|
;
|
||||||
|
; This software is provided 'as-is', without any express or implied
|
||||||
|
; warranty. In no event will the authors be held liable for any damages
|
||||||
|
; arising from the use of this software.
|
||||||
|
;
|
||||||
|
; Permission is granted to anyone to use this software for any purpose,
|
||||||
|
; including commercial applications, and to alter it and redistribute it
|
||||||
|
; freely, subject to the following restrictions:
|
||||||
|
;
|
||||||
|
; 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
; claim that you wrote the original software. If you use this software
|
||||||
|
; in a product, an acknowledgment in the product documentation would be
|
||||||
|
; appreciated but is not required.
|
||||||
|
; 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
; misrepresented as being the original software
|
||||||
|
; 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
;
|
||||||
|
; http://www.zlib.net
|
||||||
|
; http://www.winimage.com/zLibDll
|
||||||
|
; http://www.muppetlabs.com/~breadbox/software/assembly.html
|
||||||
|
;
|
||||||
|
; to compile this file for zLib, I use option:
|
||||||
|
; gcc -c -arch x86_64 gvmat64.S
|
||||||
|
|
||||||
|
|
||||||
|
;uInt longest_match(s, cur_match)
|
||||||
|
; deflate_state *s;
|
||||||
|
; IPos cur_match; // current match /
|
||||||
|
;
|
||||||
|
; with XCode for Mac, I had strange error with some jump on intel syntax
|
||||||
|
; this is why BEFORE_JMP and AFTER_JMP are used
|
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
#define BEFORE_JMP .att_syntax |
||||||
|
#define AFTER_JMP .intel_syntax noprefix |
||||||
|
|
||||||
|
#ifndef NO_UNDERLINE |
||||||
|
# define match_init _match_init |
||||||
|
# define longest_match _longest_match |
||||||
|
#endif |
||||||
|
|
||||||
|
.intel_syntax noprefix
|
||||||
|
|
||||||
|
.globl match_init, longest_match |
||||||
|
.text |
||||||
|
longest_match: |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define LocalVarsSize 96 |
||||||
|
/* |
||||||
|
; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12
|
||||||
|
; free register : r14,r15
|
||||||
|
; register can be saved : rsp
|
||||||
|
*/ |
||||||
|
|
||||||
|
#define chainlenwmask (rsp + 8 - LocalVarsSize) |
||||||
|
#define nicematch (rsp + 16 - LocalVarsSize) |
||||||
|
|
||||||
|
#define save_rdi (rsp + 24 - LocalVarsSize) |
||||||
|
#define save_rsi (rsp + 32 - LocalVarsSize) |
||||||
|
#define save_rbx (rsp + 40 - LocalVarsSize) |
||||||
|
#define save_rbp (rsp + 48 - LocalVarsSize) |
||||||
|
#define save_r12 (rsp + 56 - LocalVarsSize) |
||||||
|
#define save_r13 (rsp + 64 - LocalVarsSize) |
||||||
|
#define save_r14 (rsp + 72 - LocalVarsSize) |
||||||
|
#define save_r15 (rsp + 80 - LocalVarsSize) |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
; all the +4 offsets are due to the addition of pending_buf_size (in zlib
|
||||||
|
; in the deflate_state structure since the asm code was first written
|
||||||
|
; (if you compile with zlib 1.0.4 or older, remove the +4).
|
||||||
|
; Note : these value are good with a 8 bytes boundary pack structure
|
||||||
|
*/ |
||||||
|
|
||||||
|
#define MAX_MATCH 258 |
||||||
|
#define MIN_MATCH 3 |
||||||
|
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) |
||||||
|
|
||||||
|
/* |
||||||
|
;;; Offsets for fields in the deflate_state structure. These numbers
|
||||||
|
;;; are calculated from the definition of deflate_state, with the
|
||||||
|
;;; assumption that the compiler will dword-align the fields. (Thus,
|
||||||
|
;;; changing the definition of deflate_state could easily cause this
|
||||||
|
;;; program to crash horribly, without so much as a warning at
|
||||||
|
;;; compile time. Sigh.)
|
||||||
|
|
||||||
|
; all the +zlib1222add offsets are due to the addition of fields
|
||||||
|
; in zlib in the deflate_state structure since the asm code was first written
|
||||||
|
; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
|
||||||
|
; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
|
||||||
|
; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
|
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* you can check the structure offset by running |
||||||
|
|
||||||
|
#include <stdlib.h> |
||||||
|
#include <stdio.h> |
||||||
|
#include "deflate.h" |
||||||
|
|
||||||
|
void print_depl() |
||||||
|
{ |
||||||
|
deflate_state ds;
|
||||||
|
deflate_state *s=&ds;
|
||||||
|
printf("size pointer=%u\n",(int)sizeof(void*));
|
||||||
|
|
||||||
|
printf("#define dsWSize %u\n",(int)(((char*)&(s->w_size))-((char*)s))); |
||||||
|
printf("#define dsWMask %u\n",(int)(((char*)&(s->w_mask))-((char*)s))); |
||||||
|
printf("#define dsWindow %u\n",(int)(((char*)&(s->window))-((char*)s))); |
||||||
|
printf("#define dsPrev %u\n",(int)(((char*)&(s->prev))-((char*)s))); |
||||||
|
printf("#define dsMatchLen %u\n",(int)(((char*)&(s->match_length))-((char*)s))); |
||||||
|
printf("#define dsPrevMatch %u\n",(int)(((char*)&(s->prev_match))-((char*)s))); |
||||||
|
printf("#define dsStrStart %u\n",(int)(((char*)&(s->strstart))-((char*)s))); |
||||||
|
printf("#define dsMatchStart %u\n",(int)(((char*)&(s->match_start))-((char*)s))); |
||||||
|
printf("#define dsLookahead %u\n",(int)(((char*)&(s->lookahead))-((char*)s))); |
||||||
|
printf("#define dsPrevLen %u\n",(int)(((char*)&(s->prev_length))-((char*)s))); |
||||||
|
printf("#define dsMaxChainLen %u\n",(int)(((char*)&(s->max_chain_length))-((char*)s))); |
||||||
|
printf("#define dsGoodMatch %u\n",(int)(((char*)&(s->good_match))-((char*)s))); |
||||||
|
printf("#define dsNiceMatch %u\n",(int)(((char*)&(s->nice_match))-((char*)s))); |
||||||
|
} |
||||||
|
*/ |
||||||
|
|
||||||
|
#define dsWSize 68 |
||||||
|
#define dsWMask 76 |
||||||
|
#define dsWindow 80 |
||||||
|
#define dsPrev 96 |
||||||
|
#define dsMatchLen 144 |
||||||
|
#define dsPrevMatch 148 |
||||||
|
#define dsStrStart 156 |
||||||
|
#define dsMatchStart 160 |
||||||
|
#define dsLookahead 164 |
||||||
|
#define dsPrevLen 168 |
||||||
|
#define dsMaxChainLen 172 |
||||||
|
#define dsGoodMatch 188 |
||||||
|
#define dsNiceMatch 192 |
||||||
|
|
||||||
|
#define window_size [ rcx + dsWSize] |
||||||
|
#define WMask [ rcx + dsWMask] |
||||||
|
#define window_ad [ rcx + dsWindow] |
||||||
|
#define prev_ad [ rcx + dsPrev] |
||||||
|
#define strstart [ rcx + dsStrStart] |
||||||
|
#define match_start [ rcx + dsMatchStart] |
||||||
|
#define Lookahead [ rcx + dsLookahead] //; 0ffffffffh on infozip
|
||||||
|
#define prev_length [ rcx + dsPrevLen] |
||||||
|
#define max_chain_length [ rcx + dsMaxChainLen] |
||||||
|
#define good_match [ rcx + dsGoodMatch] |
||||||
|
#define nice_match [ rcx + dsNiceMatch] |
||||||
|
|
||||||
|
/* |
||||||
|
; windows:
|
||||||
|
; parameter 1 in rcx(deflate state s), param 2 in rdx (cur match)
|
||||||
|
|
||||||
|
; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
|
||||||
|
; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
|
||||||
|
;
|
||||||
|
; All registers must be preserved across the call, except for
|
||||||
|
; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch.
|
||||||
|
|
||||||
|
;
|
||||||
|
; gcc on macosx-linux:
|
||||||
|
; see http://www.x86-64.org/documentation/abi-0.99.pdf
|
||||||
|
; param 1 in rdi, param 2 in rsi
|
||||||
|
; rbx, rsp, rbp, r12 to r15 must be preserved
|
||||||
|
|
||||||
|
;;; Save registers that the compiler may be using, and adjust esp to
|
||||||
|
;;; make room for our stack frame.
|
||||||
|
|
||||||
|
|
||||||
|
;;; Retrieve the function arguments. r8d will hold cur_match
|
||||||
|
;;; throughout the entire function. edx will hold the pointer to the
|
||||||
|
;;; deflate_state structure during the function's setup (before
|
||||||
|
;;; entering the main loop.
|
||||||
|
|
||||||
|
; ms: parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match)
|
||||||
|
; mac: param 1 in rdi, param 2 rsi
|
||||||
|
; this clear high 32 bits of r8, which can be garbage in both r8 and rdx
|
||||||
|
*/ |
||||||
|
mov [save_rbx],rbx |
||||||
|
mov [save_rbp],rbp |
||||||
|
|
||||||
|
|
||||||
|
mov rcx,rdi |
||||||
|
|
||||||
|
mov r8d,esi |
||||||
|
|
||||||
|
|
||||||
|
mov [save_r12],r12 |
||||||
|
mov [save_r13],r13 |
||||||
|
mov [save_r14],r14 |
||||||
|
mov [save_r15],r15 |
||||||
|
|
||||||
|
|
||||||
|
//;;; uInt wmask = s->w_mask;
|
||||||
|
//;;; unsigned chain_length = s->max_chain_length;
|
||||||
|
//;;; if (s->prev_length >= s->good_match) {
|
||||||
|
//;;; chain_length >>= 2;
|
||||||
|
//;;; }
|
||||||
|
|
||||||
|
|
||||||
|
mov edi, prev_length |
||||||
|
mov esi, good_match |
||||||
|
mov eax, WMask |
||||||
|
mov ebx, max_chain_length |
||||||
|
cmp edi, esi |
||||||
|
jl LastMatchGood |
||||||
|
shr ebx, 2 |
||||||
|
LastMatchGood: |
||||||
|
|
||||||
|
//;;; chainlen is decremented once beforehand so that the function can
|
||||||
|
//;;; use the sign flag instead of the zero flag for the exit test.
|
||||||
|
//;;; It is then shifted into the high word, to make room for the wmask
|
||||||
|
//;;; value, which it will always accompany.
|
||||||
|
|
||||||
|
dec ebx |
||||||
|
shl ebx, 16 |
||||||
|
or ebx, eax |
||||||
|
|
||||||
|
//;;; on zlib only
|
||||||
|
//;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
mov eax, nice_match |
||||||
|
mov [chainlenwmask], ebx |
||||||
|
mov r10d, Lookahead |
||||||
|
cmp r10d, eax |
||||||
|
cmovnl r10d, eax |
||||||
|
mov [nicematch],r10d |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//;;; register Bytef *scan = s->window + s->strstart;
|
||||||
|
mov r10, window_ad |
||||||
|
mov ebp, strstart |
||||||
|
lea r13, [r10 + rbp] |
||||||
|
|
||||||
|
//;;; Determine how many bytes the scan ptr is off from being
|
||||||
|
//;;; dword-aligned.
|
||||||
|
|
||||||
|
mov r9,r13 |
||||||
|
neg r13 |
||||||
|
and r13,3 |
||||||
|
|
||||||
|
//;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
|
||||||
|
//;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
|
||||||
|
|
||||||
|
|
||||||
|
mov eax, window_size |
||||||
|
sub eax, MIN_LOOKAHEAD |
||||||
|
|
||||||
|
|
||||||
|
xor edi,edi |
||||||
|
sub ebp, eax |
||||||
|
|
||||||
|
mov r11d, prev_length |
||||||
|
|
||||||
|
cmovng ebp,edi |
||||||
|
|
||||||
|
//;;; int best_len = s->prev_length;
|
||||||
|
|
||||||
|
|
||||||
|
//;;; Store the sum of s->window + best_len in esi locally, and in esi.
|
||||||
|
|
||||||
|
lea rsi,[r10+r11] |
||||||
|
|
||||||
|
//;;; register ush scan_start = *(ushf*)scan;
|
||||||
|
//;;; register ush scan_end = *(ushf*)(scan+best_len-1);
|
||||||
|
//;;; Posf *prev = s->prev;
|
||||||
|
|
||||||
|
movzx r12d,word ptr [r9] |
||||||
|
movzx ebx, word ptr [r9 + r11 - 1] |
||||||
|
|
||||||
|
mov rdi, prev_ad |
||||||
|
|
||||||
|
//;;; Jump into the main loop.
|
||||||
|
|
||||||
|
mov edx, [chainlenwmask] |
||||||
|
|
||||||
|
cmp bx,word ptr [rsi + r8 - 1] |
||||||
|
jz LookupLoopIsZero |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
LookupLoop1: |
||||||
|
and r8d, edx |
||||||
|
|
||||||
|
movzx r8d, word ptr [rdi + r8*2] |
||||||
|
cmp r8d, ebp |
||||||
|
jbe LeaveNow |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sub edx, 0x00010000 |
||||||
|
BEFORE_JMP |
||||||
|
js LeaveNow |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
LoopEntry1: |
||||||
|
cmp bx,word ptr [rsi + r8 - 1] |
||||||
|
BEFORE_JMP |
||||||
|
jz LookupLoopIsZero |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
LookupLoop2: |
||||||
|
and r8d, edx |
||||||
|
|
||||||
|
movzx r8d, word ptr [rdi + r8*2] |
||||||
|
cmp r8d, ebp |
||||||
|
BEFORE_JMP |
||||||
|
jbe LeaveNow |
||||||
|
AFTER_JMP |
||||||
|
sub edx, 0x00010000 |
||||||
|
BEFORE_JMP |
||||||
|
js LeaveNow |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
LoopEntry2: |
||||||
|
cmp bx,word ptr [rsi + r8 - 1] |
||||||
|
BEFORE_JMP |
||||||
|
jz LookupLoopIsZero |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
LookupLoop4: |
||||||
|
and r8d, edx |
||||||
|
|
||||||
|
movzx r8d, word ptr [rdi + r8*2] |
||||||
|
cmp r8d, ebp |
||||||
|
BEFORE_JMP |
||||||
|
jbe LeaveNow |
||||||
|
AFTER_JMP |
||||||
|
sub edx, 0x00010000 |
||||||
|
BEFORE_JMP |
||||||
|
js LeaveNow |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
LoopEntry4: |
||||||
|
|
||||||
|
cmp bx,word ptr [rsi + r8 - 1] |
||||||
|
BEFORE_JMP |
||||||
|
jnz LookupLoop1 |
||||||
|
jmp LookupLoopIsZero |
||||||
|
AFTER_JMP |
||||||
|
/* |
||||||
|
;;; do {
|
||||||
|
;;; match = s->window + cur_match;
|
||||||
|
;;; if (*(ushf*)(match+best_len-1) != scan_end ||
|
||||||
|
;;; *(ushf*)match != scan_start) continue;
|
||||||
|
;;; [...]
|
||||||
|
;;; } while ((cur_match = prev[cur_match & wmask]) > limit
|
||||||
|
;;; && --chain_length != 0);
|
||||||
|
;;;
|
||||||
|
;;; Here is the inner loop of the function. The function will spend the
|
||||||
|
;;; majority of its time in this loop, and majority of that time will
|
||||||
|
;;; be spent in the first ten instructions.
|
||||||
|
;;;
|
||||||
|
;;; Within this loop:
|
||||||
|
;;; ebx = scanend
|
||||||
|
;;; r8d = curmatch
|
||||||
|
;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
|
||||||
|
;;; esi = windowbestlen - i.e., (window + bestlen)
|
||||||
|
;;; edi = prev
|
||||||
|
;;; ebp = limit
|
||||||
|
*/ |
||||||
|
.balign 16
|
||||||
|
LookupLoop: |
||||||
|
and r8d, edx |
||||||
|
|
||||||
|
movzx r8d, word ptr [rdi + r8*2] |
||||||
|
cmp r8d, ebp |
||||||
|
BEFORE_JMP |
||||||
|
jbe LeaveNow |
||||||
|
AFTER_JMP |
||||||
|
sub edx, 0x00010000 |
||||||
|
BEFORE_JMP |
||||||
|
js LeaveNow |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
LoopEntry: |
||||||
|
|
||||||
|
cmp bx,word ptr [rsi + r8 - 1] |
||||||
|
BEFORE_JMP |
||||||
|
jnz LookupLoop1 |
||||||
|
AFTER_JMP |
||||||
|
LookupLoopIsZero: |
||||||
|
cmp r12w, word ptr [r10 + r8] |
||||||
|
BEFORE_JMP |
||||||
|
jnz LookupLoop1 |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
|
||||||
|
//;;; Store the current value of chainlen.
|
||||||
|
mov [chainlenwmask], edx |
||||||
|
/* |
||||||
|
;;; Point edi to the string under scrutiny, and esi to the string we
|
||||||
|
;;; are hoping to match it up with. In actuality, esi and edi are
|
||||||
|
;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
|
||||||
|
;;; initialized to -(MAX_MATCH_8 - scanalign).
|
||||||
|
*/ |
||||||
|
lea rsi,[r8+r10] |
||||||
|
mov rdx, 0xfffffffffffffef8 //; -(MAX_MATCH_8)
|
||||||
|
lea rsi, [rsi + r13 + 0x0108] //;MAX_MATCH_8]
|
||||||
|
lea rdi, [r9 + r13 + 0x0108] //;MAX_MATCH_8]
|
||||||
|
|
||||||
|
prefetcht1 [rsi+rdx] |
||||||
|
prefetcht1 [rdi+rdx] |
||||||
|
|
||||||
|
/* |
||||||
|
;;; Test the strings for equality, 8 bytes at a time. At the end,
|
||||||
|
;;; adjust rdx so that it is offset to the exact byte that mismatched.
|
||||||
|
;;;
|
||||||
|
;;; We already know at this point that the first three bytes of the
|
||||||
|
;;; strings match each other, and they can be safely passed over before
|
||||||
|
;;; starting the compare loop. So what this code does is skip over 0-3
|
||||||
|
;;; bytes, as much as necessary in order to dword-align the edi
|
||||||
|
;;; pointer. (rsi will still be misaligned three times out of four.)
|
||||||
|
;;;
|
||||||
|
;;; It should be confessed that this loop usually does not represent
|
||||||
|
;;; much of the total running time. Replacing it with a more
|
||||||
|
;;; straightforward "rep cmpsb" would not drastically degrade
|
||||||
|
;;; performance.
|
||||||
|
*/ |
||||||
|
|
||||||
|
LoopCmps: |
||||||
|
mov rax, [rsi + rdx] |
||||||
|
xor rax, [rdi + rdx] |
||||||
|
jnz LeaveLoopCmps |
||||||
|
|
||||||
|
mov rax, [rsi + rdx + 8] |
||||||
|
xor rax, [rdi + rdx + 8] |
||||||
|
jnz LeaveLoopCmps8 |
||||||
|
|
||||||
|
|
||||||
|
mov rax, [rsi + rdx + 8+8] |
||||||
|
xor rax, [rdi + rdx + 8+8] |
||||||
|
jnz LeaveLoopCmps16 |
||||||
|
|
||||||
|
add rdx,8+8+8 |
||||||
|
|
||||||
|
BEFORE_JMP |
||||||
|
jnz LoopCmps |
||||||
|
jmp LenMaximum |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
LeaveLoopCmps16: add rdx,8 |
||||||
|
LeaveLoopCmps8: add rdx,8 |
||||||
|
LeaveLoopCmps: |
||||||
|
|
||||||
|
test eax, 0x0000FFFF |
||||||
|
jnz LenLower |
||||||
|
|
||||||
|
test eax,0xffffffff |
||||||
|
|
||||||
|
jnz LenLower32 |
||||||
|
|
||||||
|
add rdx,4 |
||||||
|
shr rax,32 |
||||||
|
or ax,ax |
||||||
|
BEFORE_JMP |
||||||
|
jnz LenLower |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
LenLower32: |
||||||
|
shr eax,16 |
||||||
|
add rdx,2 |
||||||
|
|
||||||
|
LenLower:
|
||||||
|
sub al, 1 |
||||||
|
adc rdx, 0 |
||||||
|
//;;; Calculate the length of the match. If it is longer than MAX_MATCH,
|
||||||
|
//;;; then automatically accept it as the best possible match and leave.
|
||||||
|
|
||||||
|
lea rax, [rdi + rdx] |
||||||
|
sub rax, r9 |
||||||
|
cmp eax, MAX_MATCH |
||||||
|
BEFORE_JMP |
||||||
|
jge LenMaximum |
||||||
|
AFTER_JMP |
||||||
|
/* |
||||||
|
;;; If the length of the match is not longer than the best match we
|
||||||
|
;;; have so far, then forget it and return to the lookup loop.
|
||||||
|
;///////////////////////////////////
|
||||||
|
*/ |
||||||
|
cmp eax, r11d |
||||||
|
jg LongerMatch |
||||||
|
|
||||||
|
lea rsi,[r10+r11] |
||||||
|
|
||||||
|
mov rdi, prev_ad |
||||||
|
mov edx, [chainlenwmask] |
||||||
|
BEFORE_JMP |
||||||
|
jmp LookupLoop |
||||||
|
AFTER_JMP |
||||||
|
/* |
||||||
|
;;; s->match_start = cur_match;
|
||||||
|
;;; best_len = len;
|
||||||
|
;;; if (len >= nice_match) break;
|
||||||
|
;;; scan_end = *(ushf*)(scan+best_len-1);
|
||||||
|
*/ |
||||||
|
LongerMatch: |
||||||
|
mov r11d, eax |
||||||
|
mov match_start, r8d |
||||||
|
cmp eax, [nicematch] |
||||||
|
BEFORE_JMP |
||||||
|
jge LeaveNow |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
lea rsi,[r10+rax] |
||||||
|
|
||||||
|
movzx ebx, word ptr [r9 + rax - 1] |
||||||
|
mov rdi, prev_ad |
||||||
|
mov edx, [chainlenwmask] |
||||||
|
BEFORE_JMP |
||||||
|
jmp LookupLoop |
||||||
|
AFTER_JMP |
||||||
|
|
||||||
|
//;;; Accept the current string, with the maximum possible length.
|
||||||
|
|
||||||
|
LenMaximum: |
||||||
|
mov r11d,MAX_MATCH |
||||||
|
mov match_start, r8d |
||||||
|
|
||||||
|
//;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
|
||||||
|
//;;; return s->lookahead;
|
||||||
|
|
||||||
|
LeaveNow: |
||||||
|
mov eax, Lookahead |
||||||
|
cmp r11d, eax |
||||||
|
cmovng eax, r11d |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//;;; Restore the stack and return from whence we came.
|
||||||
|
|
||||||
|
|
||||||
|
// mov rsi,[save_rsi] |
||||||
|
// mov rdi,[save_rdi] |
||||||
|
mov rbx,[save_rbx] |
||||||
|
mov rbp,[save_rbp] |
||||||
|
mov r12,[save_r12] |
||||||
|
mov r13,[save_r13] |
||||||
|
mov r14,[save_r14] |
||||||
|
mov r15,[save_r15] |
||||||
|
|
||||||
|
|
||||||
|
ret 0 |
||||||
|
//; please don't remove this string !
|
||||||
|
//; Your can freely use gvmat64 in any free or commercial app
|
||||||
|
//; but it is far better don't remove the string in the binary!
|
||||||
|
// db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0 |
||||||
|
|
||||||
|
|
||||||
|
match_init: |
||||||
|
ret 0 |
||||||
|
|
||||||
|
|
@ -1,413 +0,0 @@ |
|||||||
|
|
||||||
; match.asm -- Pentium-Pro optimized version of longest_match() |
|
||||||
; |
|
||||||
; Updated for zlib 1.1.3 and converted to MASM 6.1x |
|
||||||
; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> |
|
||||||
; and Chuck Walbourn <chuckw@kinesoft.com> |
|
||||||
; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> |
|
||||||
; |
|
||||||
; This is free software; you can redistribute it and/or modify it |
|
||||||
; under the terms of the GNU General Public License. |
|
||||||
|
|
||||||
; Based on match.S |
|
||||||
; Written for zlib 1.1.2 |
|
||||||
; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
|
||||||
; |
|
||||||
; Modified by Gilles Vollant (2005) for add gzhead and gzindex |
|
||||||
|
|
||||||
.686P |
|
||||||
.MODEL FLAT |
|
||||||
|
|
||||||
;=========================================================================== |
|
||||||
; EQUATES |
|
||||||
;=========================================================================== |
|
||||||
|
|
||||||
MAX_MATCH EQU 258 |
|
||||||
MIN_MATCH EQU 3 |
|
||||||
MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) |
|
||||||
MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) |
|
||||||
|
|
||||||
;=========================================================================== |
|
||||||
; STRUCTURES |
|
||||||
;=========================================================================== |
|
||||||
|
|
||||||
; This STRUCT assumes a 4-byte alignment |
|
||||||
|
|
||||||
DEFLATE_STATE STRUCT |
|
||||||
ds_strm dd ? |
|
||||||
ds_status dd ? |
|
||||||
ds_pending_buf dd ? |
|
||||||
ds_pending_buf_size dd ? |
|
||||||
ds_pending_out dd ? |
|
||||||
ds_pending dd ? |
|
||||||
ds_wrap dd ? |
|
||||||
; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h) |
|
||||||
ds_gzhead dd ? |
|
||||||
ds_gzindex dd ? |
|
||||||
ds_data_type db ? |
|
||||||
ds_method db ? |
|
||||||
db ? ; padding |
|
||||||
db ? ; padding |
|
||||||
ds_last_flush dd ? |
|
||||||
ds_w_size dd ? ; used |
|
||||||
ds_w_bits dd ? |
|
||||||
ds_w_mask dd ? ; used |
|
||||||
ds_window dd ? ; used |
|
||||||
ds_window_size dd ? |
|
||||||
ds_prev dd ? ; used |
|
||||||
ds_head dd ? |
|
||||||
ds_ins_h dd ? |
|
||||||
ds_hash_size dd ? |
|
||||||
ds_hash_bits dd ? |
|
||||||
ds_hash_mask dd ? |
|
||||||
ds_hash_shift dd ? |
|
||||||
ds_block_start dd ? |
|
||||||
ds_match_length dd ? ; used |
|
||||||
ds_prev_match dd ? ; used |
|
||||||
ds_match_available dd ? |
|
||||||
ds_strstart dd ? ; used |
|
||||||
ds_match_start dd ? ; used |
|
||||||
ds_lookahead dd ? ; used |
|
||||||
ds_prev_length dd ? ; used |
|
||||||
ds_max_chain_length dd ? ; used |
|
||||||
ds_max_laxy_match dd ? |
|
||||||
ds_level dd ? |
|
||||||
ds_strategy dd ? |
|
||||||
ds_good_match dd ? ; used |
|
||||||
ds_nice_match dd ? ; used |
|
||||||
|
|
||||||
; Don't need anymore of the struct for match |
|
||||||
DEFLATE_STATE ENDS |
|
||||||
|
|
||||||
;=========================================================================== |
|
||||||
; CODE |
|
||||||
;=========================================================================== |
|
||||||
_TEXT SEGMENT |
|
||||||
|
|
||||||
;--------------------------------------------------------------------------- |
|
||||||
; match_init |
|
||||||
;--------------------------------------------------------------------------- |
|
||||||
ALIGN 4 |
|
||||||
PUBLIC _match_init |
|
||||||
_match_init PROC |
|
||||||
; no initialization needed |
|
||||||
ret |
|
||||||
_match_init ENDP |
|
||||||
|
|
||||||
;--------------------------------------------------------------------------- |
|
||||||
; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
|
||||||
;--------------------------------------------------------------------------- |
|
||||||
ALIGN 4 |
|
||||||
|
|
||||||
PUBLIC _longest_match |
|
||||||
_longest_match PROC |
|
||||||
|
|
||||||
; Since this code uses EBP for a scratch register, the stack frame must |
|
||||||
; be manually constructed and referenced relative to the ESP register. |
|
||||||
|
|
||||||
; Stack image |
|
||||||
; Variables |
|
||||||
chainlenwmask = 0 ; high word: current chain len |
|
||||||
; low word: s->wmask |
|
||||||
window = 4 ; local copy of s->window |
|
||||||
windowbestlen = 8 ; s->window + bestlen |
|
||||||
scanend = 12 ; last two bytes of string |
|
||||||
scanstart = 16 ; first two bytes of string |
|
||||||
scanalign = 20 ; dword-misalignment of string |
|
||||||
nicematch = 24 ; a good enough match size |
|
||||||
bestlen = 28 ; size of best match so far |
|
||||||
scan = 32 ; ptr to string wanting match |
|
||||||
varsize = 36 ; number of bytes (also offset to last saved register) |
|
||||||
|
|
||||||
; Saved Registers (actually pushed into place) |
|
||||||
ebx_save = 36 |
|
||||||
edi_save = 40 |
|
||||||
esi_save = 44 |
|
||||||
ebp_save = 48 |
|
||||||
|
|
||||||
; Parameters |
|
||||||
retaddr = 52 |
|
||||||
deflatestate = 56 |
|
||||||
curmatch = 60 |
|
||||||
|
|
||||||
; Save registers that the compiler may be using |
|
||||||
push ebp |
|
||||||
push edi |
|
||||||
push esi |
|
||||||
push ebx |
|
||||||
|
|
||||||
; Allocate local variable space |
|
||||||
sub esp,varsize |
|
||||||
|
|
||||||
; Retrieve the function arguments. ecx will hold cur_match |
|
||||||
; throughout the entire function. edx will hold the pointer to the |
|
||||||
; deflate_state structure during the function's setup (before |
|
||||||
; entering the main loop). |
|
||||||
|
|
||||||
mov edx, [esp+deflatestate] |
|
||||||
ASSUME edx:PTR DEFLATE_STATE |
|
||||||
|
|
||||||
mov ecx, [esp+curmatch] |
|
||||||
|
|
||||||
; uInt wmask = s->w_mask; |
|
||||||
; unsigned chain_length = s->max_chain_length; |
|
||||||
; if (s->prev_length >= s->good_match) { |
|
||||||
; chain_length >>= 2; |
|
||||||
; } |
|
||||||
|
|
||||||
mov eax, [edx].ds_prev_length |
|
||||||
mov ebx, [edx].ds_good_match |
|
||||||
cmp eax, ebx |
|
||||||
mov eax, [edx].ds_w_mask |
|
||||||
mov ebx, [edx].ds_max_chain_length |
|
||||||
jl SHORT LastMatchGood |
|
||||||
shr ebx, 2 |
|
||||||
LastMatchGood: |
|
||||||
|
|
||||||
; chainlen is decremented once beforehand so that the function can |
|
||||||
; use the sign flag instead of the zero flag for the exit test. |
|
||||||
; It is then shifted into the high word, to make room for the wmask |
|
||||||
; value, which it will always accompany. |
|
||||||
|
|
||||||
dec ebx |
|
||||||
shl ebx, 16 |
|
||||||
or ebx, eax |
|
||||||
mov [esp+chainlenwmask], ebx |
|
||||||
|
|
||||||
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
|
||||||
|
|
||||||
mov eax, [edx].ds_nice_match |
|
||||||
mov ebx, [edx].ds_lookahead |
|
||||||
cmp ebx, eax |
|
||||||
jl SHORT LookaheadLess |
|
||||||
mov ebx, eax |
|
||||||
LookaheadLess: |
|
||||||
mov [esp+nicematch], ebx |
|
||||||
|
|
||||||
;/* register Bytef *scan = s->window + s->strstart; */ |
|
||||||
|
|
||||||
mov esi, [edx].ds_window |
|
||||||
mov [esp+window], esi |
|
||||||
mov ebp, [edx].ds_strstart |
|
||||||
lea edi, [esi+ebp] |
|
||||||
mov [esp+scan],edi |
|
||||||
|
|
||||||
;/* Determine how many bytes the scan ptr is off from being */ |
|
||||||
;/* dword-aligned. */ |
|
||||||
|
|
||||||
mov eax, edi |
|
||||||
neg eax |
|
||||||
and eax, 3 |
|
||||||
mov [esp+scanalign], eax |
|
||||||
|
|
||||||
;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ |
|
||||||
;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ |
|
||||||
|
|
||||||
mov eax, [edx].ds_w_size |
|
||||||
sub eax, MIN_LOOKAHEAD |
|
||||||
sub ebp, eax |
|
||||||
jg SHORT LimitPositive |
|
||||||
xor ebp, ebp |
|
||||||
LimitPositive: |
|
||||||
|
|
||||||
;/* int best_len = s->prev_length; */ |
|
||||||
|
|
||||||
mov eax, [edx].ds_prev_length |
|
||||||
mov [esp+bestlen], eax |
|
||||||
|
|
||||||
;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ |
|
||||||
|
|
||||||
add esi, eax |
|
||||||
mov [esp+windowbestlen], esi |
|
||||||
|
|
||||||
;/* register ush scan_start = *(ushf*)scan; */ |
|
||||||
;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ |
|
||||||
;/* Posf *prev = s->prev; */ |
|
||||||
|
|
||||||
movzx ebx, WORD PTR[edi] |
|
||||||
mov [esp+scanstart], ebx |
|
||||||
movzx ebx, WORD PTR[eax+edi-1] |
|
||||||
mov [esp+scanend], ebx |
|
||||||
mov edi, [edx].ds_prev |
|
||||||
|
|
||||||
;/* Jump into the main loop. */ |
|
||||||
|
|
||||||
mov edx, [esp+chainlenwmask] |
|
||||||
jmp SHORT LoopEntry |
|
||||||
|
|
||||||
;/* do { |
|
||||||
; * match = s->window + cur_match; |
|
||||||
; * if (*(ushf*)(match+best_len-1) != scan_end || |
|
||||||
; * *(ushf*)match != scan_start) continue; |
|
||||||
; * [...] |
|
||||||
; * } while ((cur_match = prev[cur_match & wmask]) > limit |
|
||||||
; * && --chain_length != 0); |
|
||||||
; * |
|
||||||
; * Here is the inner loop of the function. The function will spend the |
|
||||||
; * majority of its time in this loop, and majority of that time will |
|
||||||
; * be spent in the first ten instructions. |
|
||||||
; * |
|
||||||
; * Within this loop: |
|
||||||
; * %ebx = scanend |
|
||||||
; * %ecx = curmatch |
|
||||||
; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) |
|
||||||
; * %esi = windowbestlen - i.e., (window + bestlen) |
|
||||||
; * %edi = prev |
|
||||||
; * %ebp = limit |
|
||||||
; */ |
|
||||||
|
|
||||||
ALIGN 4 |
|
||||||
LookupLoop: |
|
||||||
and ecx, edx |
|
||||||
movzx ecx, WORD PTR[edi+ecx*2] |
|
||||||
cmp ecx, ebp |
|
||||||
jbe LeaveNow |
|
||||||
sub edx, 000010000H |
|
||||||
js LeaveNow |
|
||||||
|
|
||||||
LoopEntry: |
|
||||||
movzx eax, WORD PTR[esi+ecx-1] |
|
||||||
cmp eax, ebx |
|
||||||
jnz SHORT LookupLoop |
|
||||||
|
|
||||||
mov eax, [esp+window] |
|
||||||
movzx eax, WORD PTR[eax+ecx] |
|
||||||
cmp eax, [esp+scanstart] |
|
||||||
jnz SHORT LookupLoop |
|
||||||
|
|
||||||
;/* Store the current value of chainlen. */ |
|
||||||
|
|
||||||
mov [esp+chainlenwmask], edx |
|
||||||
|
|
||||||
;/* Point %edi to the string under scrutiny, and %esi to the string we */ |
|
||||||
;/* are hoping to match it up with. In actuality, %esi and %edi are */ |
|
||||||
;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ |
|
||||||
;/* initialized to -(MAX_MATCH_8 - scanalign). */ |
|
||||||
|
|
||||||
mov esi, [esp+window] |
|
||||||
mov edi, [esp+scan] |
|
||||||
add esi, ecx |
|
||||||
mov eax, [esp+scanalign] |
|
||||||
mov edx, -MAX_MATCH_8 |
|
||||||
lea edi, [edi+eax+MAX_MATCH_8] |
|
||||||
lea esi, [esi+eax+MAX_MATCH_8] |
|
||||||
|
|
||||||
;/* Test the strings for equality, 8 bytes at a time. At the end, |
|
||||||
; * adjust %edx so that it is offset to the exact byte that mismatched. |
|
||||||
; * |
|
||||||
; * We already know at this point that the first three bytes of the |
|
||||||
; * strings match each other, and they can be safely passed over before |
|
||||||
; * starting the compare loop. So what this code does is skip over 0-3 |
|
||||||
; * bytes, as much as necessary in order to dword-align the %edi |
|
||||||
; * pointer. (%esi will still be misaligned three times out of four.) |
|
||||||
; * |
|
||||||
; * It should be confessed that this loop usually does not represent |
|
||||||
; * much of the total running time. Replacing it with a more |
|
||||||
; * straightforward "rep cmpsb" would not drastically degrade |
|
||||||
; * performance. |
|
||||||
; */ |
|
||||||
|
|
||||||
LoopCmps: |
|
||||||
mov eax, DWORD PTR[esi+edx] |
|
||||||
xor eax, DWORD PTR[edi+edx] |
|
||||||
jnz SHORT LeaveLoopCmps |
|
||||||
|
|
||||||
mov eax, DWORD PTR[esi+edx+4] |
|
||||||
xor eax, DWORD PTR[edi+edx+4] |
|
||||||
jnz SHORT LeaveLoopCmps4 |
|
||||||
|
|
||||||
add edx, 8 |
|
||||||
jnz SHORT LoopCmps |
|
||||||
jmp LenMaximum |
|
||||||
ALIGN 4 |
|
||||||
|
|
||||||
LeaveLoopCmps4: |
|
||||||
add edx, 4 |
|
||||||
|
|
||||||
LeaveLoopCmps: |
|
||||||
test eax, 00000FFFFH |
|
||||||
jnz SHORT LenLower |
|
||||||
|
|
||||||
add edx, 2 |
|
||||||
shr eax, 16 |
|
||||||
|
|
||||||
LenLower: |
|
||||||
sub al, 1 |
|
||||||
adc edx, 0 |
|
||||||
|
|
||||||
;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ |
|
||||||
;/* then automatically accept it as the best possible match and leave. */ |
|
||||||
|
|
||||||
lea eax, [edi+edx] |
|
||||||
mov edi, [esp+scan] |
|
||||||
sub eax, edi |
|
||||||
cmp eax, MAX_MATCH |
|
||||||
jge SHORT LenMaximum |
|
||||||
|
|
||||||
;/* If the length of the match is not longer than the best match we */ |
|
||||||
;/* have so far, then forget it and return to the lookup loop. */ |
|
||||||
|
|
||||||
mov edx, [esp+deflatestate] |
|
||||||
mov ebx, [esp+bestlen] |
|
||||||
cmp eax, ebx |
|
||||||
jg SHORT LongerMatch |
|
||||||
mov esi, [esp+windowbestlen] |
|
||||||
mov edi, [edx].ds_prev |
|
||||||
mov ebx, [esp+scanend] |
|
||||||
mov edx, [esp+chainlenwmask] |
|
||||||
jmp LookupLoop |
|
||||||
ALIGN 4 |
|
||||||
|
|
||||||
;/* s->match_start = cur_match; */ |
|
||||||
;/* best_len = len; */ |
|
||||||
;/* if (len >= nice_match) break; */ |
|
||||||
;/* scan_end = *(ushf*)(scan+best_len-1); */ |
|
||||||
|
|
||||||
LongerMatch: |
|
||||||
mov ebx, [esp+nicematch] |
|
||||||
mov [esp+bestlen], eax |
|
||||||
mov [edx].ds_match_start, ecx |
|
||||||
cmp eax, ebx |
|
||||||
jge SHORT LeaveNow |
|
||||||
mov esi, [esp+window] |
|
||||||
add esi, eax |
|
||||||
mov [esp+windowbestlen], esi |
|
||||||
movzx ebx, WORD PTR[edi+eax-1] |
|
||||||
mov edi, [edx].ds_prev |
|
||||||
mov [esp+scanend], ebx |
|
||||||
mov edx, [esp+chainlenwmask] |
|
||||||
jmp LookupLoop |
|
||||||
ALIGN 4 |
|
||||||
|
|
||||||
;/* Accept the current string, with the maximum possible length. */ |
|
||||||
|
|
||||||
LenMaximum: |
|
||||||
mov edx, [esp+deflatestate] |
|
||||||
mov DWORD PTR[esp+bestlen], MAX_MATCH |
|
||||||
mov [edx].ds_match_start, ecx |
|
||||||
|
|
||||||
;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ |
|
||||||
;/* return s->lookahead; */ |
|
||||||
|
|
||||||
LeaveNow: |
|
||||||
mov edx, [esp+deflatestate] |
|
||||||
mov ebx, [esp+bestlen] |
|
||||||
mov eax, [edx].ds_lookahead |
|
||||||
cmp ebx, eax |
|
||||||
jg SHORT LookaheadRet |
|
||||||
mov eax, ebx |
|
||||||
LookaheadRet: |
|
||||||
|
|
||||||
; Restore the stack and return from whence we came. |
|
||||||
|
|
||||||
add esp, varsize |
|
||||||
pop ebx |
|
||||||
pop esi |
|
||||||
pop edi |
|
||||||
pop ebp |
|
||||||
ret |
|
||||||
|
|
||||||
_longest_match ENDP |
|
||||||
|
|
||||||
_TEXT ENDS |
|
||||||
END |
|
@ -1,2 +1,2 @@ |
|||||||
ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm |
ml /coff /Zi /c /Flmatch686.lst match686.asm |
||||||
ml /coff /Zi /c /Flinffas32.lst inffas32.asm |
ml /coff /Zi /c /Flinffas32.lst inffas32.asm |
||||||
|
@ -1,972 +0,0 @@ |
|||||||
; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 |
|
||||||
; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. |
|
||||||
; File written by Gilles Vollant, by modifiying the longest_match |
|
||||||
; from Jean-loup Gailly in deflate.c |
|
||||||
; |
|
||||||
; http://www.zlib.net |
|
||||||
; http://www.winimage.com/zLibDll |
|
||||||
; http://www.muppetlabs.com/~breadbox/software/assembly.html |
|
||||||
; |
|
||||||
; For Visual C++ 4.x and higher and ML 6.x and higher |
|
||||||
; ml.exe is in directory \MASM611C of Win95 DDK |
|
||||||
; ml.exe is also distributed in http://www.masm32.com/masmdl.htm |
|
||||||
; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ |
|
||||||
; |
|
||||||
; this file contain two implementation of longest_match |
|
||||||
; |
|
||||||
; longest_match_7fff : written 1996 by Gilles Vollant optimized for |
|
||||||
; first Pentium. Assume s->w_mask == 0x7fff |
|
||||||
; longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro |
|
||||||
; |
|
||||||
; for using an seembly version of longest_match, you need define ASMV in project |
|
||||||
; There is two way in using gvmat32.asm |
|
||||||
; |
|
||||||
; A) Suggested method |
|
||||||
; if you want include both longest_match_7fff and longest_match_686 |
|
||||||
; compile the asm file running |
|
||||||
; ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm |
|
||||||
; and include gvmat32c.c in your project |
|
||||||
; if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff, |
|
||||||
; longest_match_7fff will be used |
|
||||||
; if you have a more modern CPU (Pentium Pro, II and higher) |
|
||||||
; longest_match_686 will be used |
|
||||||
; on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used, |
|
||||||
; but this is not a sitation you'll find often |
|
||||||
; |
|
||||||
; B) Alternative |
|
||||||
; if you are not interresed in old cpu performance and want the smaller |
|
||||||
; binaries possible |
|
||||||
; |
|
||||||
; compile the asm file running |
|
||||||
; ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm |
|
||||||
; and do not include gvmat32c.c in your project (ou define also |
|
||||||
; NOOLDPENTIUMCODE) |
|
||||||
; |
|
||||||
; note : as I known, longest_match_686 is very faster than longest_match_7fff |
|
||||||
; on pentium Pro/II/III, faster (but less) in P4, but it seem |
|
||||||
; longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8 |
|
||||||
; |
|
||||||
; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 |
|
||||||
|
|
||||||
;uInt longest_match_7fff(s, cur_match) |
|
||||||
; deflate_state *s; |
|
||||||
; IPos cur_match; /* current match */ |
|
||||||
|
|
||||||
NbStack equ 76 |
|
||||||
cur_match equ dword ptr[esp+NbStack-0] |
|
||||||
str_s equ dword ptr[esp+NbStack-4] |
|
||||||
; 5 dword on top (ret,ebp,esi,edi,ebx) |
|
||||||
adrret equ dword ptr[esp+NbStack-8] |
|
||||||
pushebp equ dword ptr[esp+NbStack-12] |
|
||||||
pushedi equ dword ptr[esp+NbStack-16] |
|
||||||
pushesi equ dword ptr[esp+NbStack-20] |
|
||||||
pushebx equ dword ptr[esp+NbStack-24] |
|
||||||
|
|
||||||
chain_length equ dword ptr [esp+NbStack-28] |
|
||||||
limit equ dword ptr [esp+NbStack-32] |
|
||||||
best_len equ dword ptr [esp+NbStack-36] |
|
||||||
window equ dword ptr [esp+NbStack-40] |
|
||||||
prev equ dword ptr [esp+NbStack-44] |
|
||||||
scan_start equ word ptr [esp+NbStack-48] |
|
||||||
wmask equ dword ptr [esp+NbStack-52] |
|
||||||
match_start_ptr equ dword ptr [esp+NbStack-56] |
|
||||||
nice_match equ dword ptr [esp+NbStack-60] |
|
||||||
scan equ dword ptr [esp+NbStack-64] |
|
||||||
|
|
||||||
windowlen equ dword ptr [esp+NbStack-68] |
|
||||||
match_start equ dword ptr [esp+NbStack-72] |
|
||||||
strend equ dword ptr [esp+NbStack-76] |
|
||||||
NbStackAdd equ (NbStack-24) |
|
||||||
|
|
||||||
.386p |
|
||||||
|
|
||||||
name gvmatch |
|
||||||
.MODEL FLAT |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
; all the +zlib1222add offsets are due to the addition of fields |
|
||||||
; in zlib in the deflate_state structure since the asm code was first written |
|
||||||
; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). |
|
||||||
; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). |
|
||||||
; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). |
|
||||||
|
|
||||||
zlib1222add equ 8 |
|
||||||
|
|
||||||
; Note : these value are good with a 8 bytes boundary pack structure |
|
||||||
dep_chain_length equ 74h+zlib1222add |
|
||||||
dep_window equ 30h+zlib1222add |
|
||||||
dep_strstart equ 64h+zlib1222add |
|
||||||
dep_prev_length equ 70h+zlib1222add |
|
||||||
dep_nice_match equ 88h+zlib1222add |
|
||||||
dep_w_size equ 24h+zlib1222add |
|
||||||
dep_prev equ 38h+zlib1222add |
|
||||||
dep_w_mask equ 2ch+zlib1222add |
|
||||||
dep_good_match equ 84h+zlib1222add |
|
||||||
dep_match_start equ 68h+zlib1222add |
|
||||||
dep_lookahead equ 6ch+zlib1222add |
|
||||||
|
|
||||||
|
|
||||||
_TEXT segment |
|
||||||
|
|
||||||
IFDEF NOUNDERLINE |
|
||||||
IFDEF NOOLDPENTIUMCODE |
|
||||||
public longest_match |
|
||||||
public match_init |
|
||||||
ELSE |
|
||||||
public longest_match_7fff |
|
||||||
public cpudetect32 |
|
||||||
public longest_match_686 |
|
||||||
ENDIF |
|
||||||
ELSE |
|
||||||
IFDEF NOOLDPENTIUMCODE |
|
||||||
public _longest_match |
|
||||||
public _match_init |
|
||||||
ELSE |
|
||||||
public _longest_match_7fff |
|
||||||
public _cpudetect32 |
|
||||||
public _longest_match_686 |
|
||||||
ENDIF |
|
||||||
ENDIF |
|
||||||
|
|
||||||
MAX_MATCH equ 258 |
|
||||||
MIN_MATCH equ 3 |
|
||||||
MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
IFNDEF NOOLDPENTIUMCODE |
|
||||||
IFDEF NOUNDERLINE |
|
||||||
longest_match_7fff proc near |
|
||||||
ELSE |
|
||||||
_longest_match_7fff proc near |
|
||||||
ENDIF |
|
||||||
|
|
||||||
mov edx,[esp+4] |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
push ebp |
|
||||||
push edi |
|
||||||
push esi |
|
||||||
push ebx |
|
||||||
|
|
||||||
sub esp,NbStackAdd |
|
||||||
|
|
||||||
; initialize or check the variables used in match.asm. |
|
||||||
mov ebp,edx |
|
||||||
|
|
||||||
; chain_length = s->max_chain_length |
|
||||||
; if (prev_length>=good_match) chain_length >>= 2 |
|
||||||
mov edx,[ebp+dep_chain_length] |
|
||||||
mov ebx,[ebp+dep_prev_length] |
|
||||||
cmp [ebp+dep_good_match],ebx |
|
||||||
ja noshr |
|
||||||
shr edx,2 |
|
||||||
noshr: |
|
||||||
; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop |
|
||||||
inc edx |
|
||||||
mov edi,[ebp+dep_nice_match] |
|
||||||
mov chain_length,edx |
|
||||||
mov eax,[ebp+dep_lookahead] |
|
||||||
cmp eax,edi |
|
||||||
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
|
||||||
jae nolookaheadnicematch |
|
||||||
mov edi,eax |
|
||||||
nolookaheadnicematch: |
|
||||||
; best_len = s->prev_length |
|
||||||
mov best_len,ebx |
|
||||||
|
|
||||||
; window = s->window |
|
||||||
mov esi,[ebp+dep_window] |
|
||||||
mov ecx,[ebp+dep_strstart] |
|
||||||
mov window,esi |
|
||||||
|
|
||||||
mov nice_match,edi |
|
||||||
; scan = window + strstart |
|
||||||
add esi,ecx |
|
||||||
mov scan,esi |
|
||||||
; dx = *window |
|
||||||
mov dx,word ptr [esi] |
|
||||||
; bx = *(window+best_len-1) |
|
||||||
mov bx,word ptr [esi+ebx-1] |
|
||||||
add esi,MAX_MATCH-1 |
|
||||||
; scan_start = *scan |
|
||||||
mov scan_start,dx |
|
||||||
; strend = scan + MAX_MATCH-1 |
|
||||||
mov strend,esi |
|
||||||
; bx = scan_end = *(window+best_len-1) |
|
||||||
|
|
||||||
; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
|
||||||
; s->strstart - (IPos)MAX_DIST(s) : NIL; |
|
||||||
|
|
||||||
mov esi,[ebp+dep_w_size] |
|
||||||
sub esi,MIN_LOOKAHEAD |
|
||||||
; here esi = MAX_DIST(s) |
|
||||||
sub ecx,esi |
|
||||||
ja nodist |
|
||||||
xor ecx,ecx |
|
||||||
nodist: |
|
||||||
mov limit,ecx |
|
||||||
|
|
||||||
; prev = s->prev |
|
||||||
mov edx,[ebp+dep_prev] |
|
||||||
mov prev,edx |
|
||||||
|
|
||||||
; |
|
||||||
mov edx,dword ptr [ebp+dep_match_start] |
|
||||||
mov bp,scan_start |
|
||||||
mov eax,cur_match |
|
||||||
mov match_start,edx |
|
||||||
|
|
||||||
mov edx,window |
|
||||||
mov edi,edx |
|
||||||
add edi,best_len |
|
||||||
mov esi,prev |
|
||||||
dec edi |
|
||||||
; windowlen = window + best_len -1 |
|
||||||
mov windowlen,edi |
|
||||||
|
|
||||||
jmp beginloop2 |
|
||||||
align 4 |
|
||||||
|
|
||||||
; here, in the loop |
|
||||||
; eax = ax = cur_match |
|
||||||
; ecx = limit |
|
||||||
; bx = scan_end |
|
||||||
; bp = scan_start |
|
||||||
; edi = windowlen (window + best_len -1) |
|
||||||
; esi = prev |
|
||||||
|
|
||||||
|
|
||||||
;// here; chain_length <=16 |
|
||||||
normalbeg0add16: |
|
||||||
add chain_length,16 |
|
||||||
jz exitloop |
|
||||||
normalbeg0: |
|
||||||
cmp word ptr[edi+eax],bx |
|
||||||
je normalbeg2noroll |
|
||||||
rcontlabnoroll: |
|
||||||
; cur_match = prev[cur_match & wmask] |
|
||||||
and eax,7fffh |
|
||||||
mov ax,word ptr[esi+eax*2] |
|
||||||
; if cur_match > limit, go to exitloop |
|
||||||
cmp ecx,eax |
|
||||||
jnb exitloop |
|
||||||
; if --chain_length != 0, go to exitloop |
|
||||||
dec chain_length |
|
||||||
jnz normalbeg0 |
|
||||||
jmp exitloop |
|
||||||
|
|
||||||
normalbeg2noroll: |
|
||||||
; if (scan_start==*(cur_match+window)) goto normalbeg2 |
|
||||||
cmp bp,word ptr[edx+eax] |
|
||||||
jne rcontlabnoroll |
|
||||||
jmp normalbeg2 |
|
||||||
|
|
||||||
contloop3: |
|
||||||
mov edi,windowlen |
|
||||||
|
|
||||||
; cur_match = prev[cur_match & wmask] |
|
||||||
and eax,7fffh |
|
||||||
mov ax,word ptr[esi+eax*2] |
|
||||||
; if cur_match > limit, go to exitloop |
|
||||||
cmp ecx,eax |
|
||||||
jnbexitloopshort1: |
|
||||||
jnb exitloop |
|
||||||
; if --chain_length != 0, go to exitloop |
|
||||||
|
|
||||||
|
|
||||||
; begin the main loop |
|
||||||
beginloop2: |
|
||||||
sub chain_length,16+1 |
|
||||||
; if chain_length <=16, don't use the unrolled loop |
|
||||||
jna normalbeg0add16 |
|
||||||
|
|
||||||
do16: |
|
||||||
cmp word ptr[edi+eax],bx |
|
||||||
je normalbeg2dc0 |
|
||||||
|
|
||||||
maccn MACRO lab |
|
||||||
and eax,7fffh |
|
||||||
mov ax,word ptr[esi+eax*2] |
|
||||||
cmp ecx,eax |
|
||||||
jnb exitloop |
|
||||||
cmp word ptr[edi+eax],bx |
|
||||||
je lab |
|
||||||
ENDM |
|
||||||
|
|
||||||
rcontloop0: |
|
||||||
maccn normalbeg2dc1 |
|
||||||
|
|
||||||
rcontloop1: |
|
||||||
maccn normalbeg2dc2 |
|
||||||
|
|
||||||
rcontloop2: |
|
||||||
maccn normalbeg2dc3 |
|
||||||
|
|
||||||
rcontloop3: |
|
||||||
maccn normalbeg2dc4 |
|
||||||
|
|
||||||
rcontloop4: |
|
||||||
maccn normalbeg2dc5 |
|
||||||
|
|
||||||
rcontloop5: |
|
||||||
maccn normalbeg2dc6 |
|
||||||
|
|
||||||
rcontloop6: |
|
||||||
maccn normalbeg2dc7 |
|
||||||
|
|
||||||
rcontloop7: |
|
||||||
maccn normalbeg2dc8 |
|
||||||
|
|
||||||
rcontloop8: |
|
||||||
maccn normalbeg2dc9 |
|
||||||
|
|
||||||
rcontloop9: |
|
||||||
maccn normalbeg2dc10 |
|
||||||
|
|
||||||
rcontloop10: |
|
||||||
maccn short normalbeg2dc11 |
|
||||||
|
|
||||||
rcontloop11: |
|
||||||
maccn short normalbeg2dc12 |
|
||||||
|
|
||||||
rcontloop12: |
|
||||||
maccn short normalbeg2dc13 |
|
||||||
|
|
||||||
rcontloop13: |
|
||||||
maccn short normalbeg2dc14 |
|
||||||
|
|
||||||
rcontloop14: |
|
||||||
maccn short normalbeg2dc15 |
|
||||||
|
|
||||||
rcontloop15: |
|
||||||
and eax,7fffh |
|
||||||
mov ax,word ptr[esi+eax*2] |
|
||||||
cmp ecx,eax |
|
||||||
jnb exitloop |
|
||||||
|
|
||||||
sub chain_length,16 |
|
||||||
ja do16 |
|
||||||
jmp normalbeg0add16 |
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|
||||||
|
|
||||||
normbeg MACRO rcontlab,valsub |
|
||||||
; if we are here, we know that *(match+best_len-1) == scan_end |
|
||||||
cmp bp,word ptr[edx+eax] |
|
||||||
; if (match != scan_start) goto rcontlab |
|
||||||
jne rcontlab |
|
||||||
; calculate the good chain_length, and we'll compare scan and match string |
|
||||||
add chain_length,16-valsub |
|
||||||
jmp iseq |
|
||||||
ENDM |
|
||||||
|
|
||||||
|
|
||||||
normalbeg2dc11: |
|
||||||
normbeg rcontloop11,11 |
|
||||||
|
|
||||||
normalbeg2dc12: |
|
||||||
normbeg short rcontloop12,12 |
|
||||||
|
|
||||||
normalbeg2dc13: |
|
||||||
normbeg short rcontloop13,13 |
|
||||||
|
|
||||||
normalbeg2dc14: |
|
||||||
normbeg short rcontloop14,14 |
|
||||||
|
|
||||||
normalbeg2dc15: |
|
||||||
normbeg short rcontloop15,15 |
|
||||||
|
|
||||||
normalbeg2dc10: |
|
||||||
normbeg rcontloop10,10 |
|
||||||
|
|
||||||
normalbeg2dc9: |
|
||||||
normbeg rcontloop9,9 |
|
||||||
|
|
||||||
normalbeg2dc8: |
|
||||||
normbeg rcontloop8,8 |
|
||||||
|
|
||||||
normalbeg2dc7: |
|
||||||
normbeg rcontloop7,7 |
|
||||||
|
|
||||||
normalbeg2dc6: |
|
||||||
normbeg rcontloop6,6 |
|
||||||
|
|
||||||
normalbeg2dc5: |
|
||||||
normbeg rcontloop5,5 |
|
||||||
|
|
||||||
normalbeg2dc4: |
|
||||||
normbeg rcontloop4,4 |
|
||||||
|
|
||||||
normalbeg2dc3: |
|
||||||
normbeg rcontloop3,3 |
|
||||||
|
|
||||||
normalbeg2dc2: |
|
||||||
normbeg rcontloop2,2 |
|
||||||
|
|
||||||
normalbeg2dc1: |
|
||||||
normbeg rcontloop1,1 |
|
||||||
|
|
||||||
normalbeg2dc0: |
|
||||||
normbeg rcontloop0,0 |
|
||||||
|
|
||||||
|
|
||||||
; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end |
|
||||||
|
|
||||||
normalbeg2: |
|
||||||
mov edi,window |
|
||||||
|
|
||||||
cmp bp,word ptr[edi+eax] |
|
||||||
jne contloop3 ; if *(ushf*)match != scan_start, continue |
|
||||||
|
|
||||||
iseq: |
|
||||||
; if we are here, we know that *(match+best_len-1) == scan_end |
|
||||||
; and (match == scan_start) |
|
||||||
|
|
||||||
mov edi,edx |
|
||||||
mov esi,scan ; esi = scan |
|
||||||
add edi,eax ; edi = window + cur_match = match |
|
||||||
|
|
||||||
mov edx,[esi+3] ; compare manually dword at match+3 |
|
||||||
xor edx,[edi+3] ; and scan +3 |
|
||||||
|
|
||||||
jz begincompare ; if equal, go to long compare |
|
||||||
|
|
||||||
; we will determine the unmatch byte and calculate len (in esi) |
|
||||||
or dl,dl |
|
||||||
je eq1rr |
|
||||||
mov esi,3 |
|
||||||
jmp trfinval |
|
||||||
eq1rr: |
|
||||||
or dx,dx |
|
||||||
je eq1 |
|
||||||
|
|
||||||
mov esi,4 |
|
||||||
jmp trfinval |
|
||||||
eq1: |
|
||||||
and edx,0ffffffh |
|
||||||
jz eq11 |
|
||||||
mov esi,5 |
|
||||||
jmp trfinval |
|
||||||
eq11: |
|
||||||
mov esi,6 |
|
||||||
jmp trfinval |
|
||||||
|
|
||||||
begincompare: |
|
||||||
; here we now scan and match begin same |
|
||||||
add edi,6 |
|
||||||
add esi,6 |
|
||||||
mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes |
|
||||||
repe cmpsd ; loop until mismatch |
|
||||||
|
|
||||||
je trfin ; go to trfin if not unmatch |
|
||||||
; we determine the unmatch byte |
|
||||||
sub esi,4 |
|
||||||
mov edx,[edi-4] |
|
||||||
xor edx,[esi] |
|
||||||
|
|
||||||
or dl,dl |
|
||||||
jnz trfin |
|
||||||
inc esi |
|
||||||
|
|
||||||
or dx,dx |
|
||||||
jnz trfin |
|
||||||
inc esi |
|
||||||
|
|
||||||
and edx,0ffffffh |
|
||||||
jnz trfin |
|
||||||
inc esi |
|
||||||
|
|
||||||
trfin: |
|
||||||
sub esi,scan ; esi = len |
|
||||||
trfinval: |
|
||||||
; here we have finised compare, and esi contain len of equal string |
|
||||||
cmp esi,best_len ; if len > best_len, go newbestlen |
|
||||||
ja short newbestlen |
|
||||||
; now we restore edx, ecx and esi, for the big loop |
|
||||||
mov esi,prev |
|
||||||
mov ecx,limit |
|
||||||
mov edx,window |
|
||||||
jmp contloop3 |
|
||||||
|
|
||||||
newbestlen: |
|
||||||
mov best_len,esi ; len become best_len |
|
||||||
|
|
||||||
mov match_start,eax ; save new position as match_start |
|
||||||
cmp esi,nice_match ; if best_len >= nice_match, exit |
|
||||||
jae exitloop |
|
||||||
mov ecx,scan |
|
||||||
mov edx,window ; restore edx=window |
|
||||||
add ecx,esi |
|
||||||
add esi,edx |
|
||||||
|
|
||||||
dec esi |
|
||||||
mov windowlen,esi ; windowlen = window + best_len-1 |
|
||||||
mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end |
|
||||||
|
|
||||||
; now we restore ecx and esi, for the big loop : |
|
||||||
mov esi,prev |
|
||||||
mov ecx,limit |
|
||||||
jmp contloop3 |
|
||||||
|
|
||||||
exitloop: |
|
||||||
; exit : s->match_start=match_start |
|
||||||
mov ebx,match_start |
|
||||||
mov ebp,str_s |
|
||||||
mov ecx,best_len |
|
||||||
mov dword ptr [ebp+dep_match_start],ebx |
|
||||||
mov eax,dword ptr [ebp+dep_lookahead] |
|
||||||
cmp ecx,eax |
|
||||||
ja minexlo |
|
||||||
mov eax,ecx |
|
||||||
minexlo: |
|
||||||
; return min(best_len,s->lookahead) |
|
||||||
|
|
||||||
; restore stack and register ebx,esi,edi,ebp |
|
||||||
add esp,NbStackAdd |
|
||||||
|
|
||||||
pop ebx |
|
||||||
pop esi |
|
||||||
pop edi |
|
||||||
pop ebp |
|
||||||
ret |
|
||||||
InfoAuthor: |
|
||||||
; please don't remove this string ! |
|
||||||
; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! |
|
||||||
db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
IFDEF NOUNDERLINE |
|
||||||
longest_match_7fff endp |
|
||||||
ELSE |
|
||||||
_longest_match_7fff endp |
|
||||||
ENDIF |
|
||||||
|
|
||||||
|
|
||||||
IFDEF NOUNDERLINE |
|
||||||
cpudetect32 proc near |
|
||||||
ELSE |
|
||||||
_cpudetect32 proc near |
|
||||||
ENDIF |
|
||||||
|
|
||||||
push ebx |
|
||||||
|
|
||||||
pushfd ; push original EFLAGS |
|
||||||
pop eax ; get original EFLAGS |
|
||||||
mov ecx, eax ; save original EFLAGS |
|
||||||
xor eax, 40000h ; flip AC bit in EFLAGS |
|
||||||
push eax ; save new EFLAGS value on stack |
|
||||||
popfd ; replace current EFLAGS value |
|
||||||
pushfd ; get new EFLAGS |
|
||||||
pop eax ; store new EFLAGS in EAX |
|
||||||
xor eax, ecx ; can’t toggle AC bit, processor=80386 |
|
||||||
jz end_cpu_is_386 ; jump if 80386 processor |
|
||||||
push ecx |
|
||||||
popfd ; restore AC bit in EFLAGS first |
|
||||||
|
|
||||||
pushfd |
|
||||||
pushfd |
|
||||||
pop ecx |
|
||||||
|
|
||||||
mov eax, ecx ; get original EFLAGS |
|
||||||
xor eax, 200000h ; flip ID bit in EFLAGS |
|
||||||
push eax ; save new EFLAGS value on stack |
|
||||||
popfd ; replace current EFLAGS value |
|
||||||
pushfd ; get new EFLAGS |
|
||||||
pop eax ; store new EFLAGS in EAX |
|
||||||
popfd ; restore original EFLAGS |
|
||||||
xor eax, ecx ; can’t toggle ID bit, |
|
||||||
je is_old_486 ; processor=old |
|
||||||
|
|
||||||
mov eax,1 |
|
||||||
db 0fh,0a2h ;CPUID |
|
||||||
|
|
||||||
exitcpudetect: |
|
||||||
pop ebx |
|
||||||
ret |
|
||||||
|
|
||||||
end_cpu_is_386: |
|
||||||
mov eax,0300h |
|
||||||
jmp exitcpudetect |
|
||||||
|
|
||||||
is_old_486: |
|
||||||
mov eax,0400h |
|
||||||
jmp exitcpudetect |
|
||||||
|
|
||||||
IFDEF NOUNDERLINE |
|
||||||
cpudetect32 endp |
|
||||||
ELSE |
|
||||||
_cpudetect32 endp |
|
||||||
ENDIF |
|
||||||
ENDIF |
|
||||||
|
|
||||||
MAX_MATCH equ 258 |
|
||||||
MIN_MATCH equ 3 |
|
||||||
MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) |
|
||||||
MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) |
|
||||||
|
|
||||||
|
|
||||||
;;; stack frame offsets |
|
||||||
|
|
||||||
chainlenwmask equ esp + 0 ; high word: current chain len |
|
||||||
; low word: s->wmask |
|
||||||
window equ esp + 4 ; local copy of s->window |
|
||||||
windowbestlen equ esp + 8 ; s->window + bestlen |
|
||||||
scanstart equ esp + 16 ; first two bytes of string |
|
||||||
scanend equ esp + 12 ; last two bytes of string |
|
||||||
scanalign equ esp + 20 ; dword-misalignment of string |
|
||||||
nicematch equ esp + 24 ; a good enough match size |
|
||||||
bestlen equ esp + 28 ; size of best match so far |
|
||||||
scan equ esp + 32 ; ptr to string wanting match |
|
||||||
|
|
||||||
LocalVarsSize equ 36 |
|
||||||
; saved ebx byte esp + 36 |
|
||||||
; saved edi byte esp + 40 |
|
||||||
; saved esi byte esp + 44 |
|
||||||
; saved ebp byte esp + 48 |
|
||||||
; return address byte esp + 52 |
|
||||||
deflatestate equ esp + 56 ; the function arguments |
|
||||||
curmatch equ esp + 60 |
|
||||||
|
|
||||||
;;; Offsets for fields in the deflate_state structure. These numbers |
|
||||||
;;; are calculated from the definition of deflate_state, with the |
|
||||||
;;; assumption that the compiler will dword-align the fields. (Thus, |
|
||||||
;;; changing the definition of deflate_state could easily cause this |
|
||||||
;;; program to crash horribly, without so much as a warning at |
|
||||||
;;; compile time. Sigh.) |
|
||||||
|
|
||||||
dsWSize equ 36+zlib1222add |
|
||||||
dsWMask equ 44+zlib1222add |
|
||||||
dsWindow equ 48+zlib1222add |
|
||||||
dsPrev equ 56+zlib1222add |
|
||||||
dsMatchLen equ 88+zlib1222add |
|
||||||
dsPrevMatch equ 92+zlib1222add |
|
||||||
dsStrStart equ 100+zlib1222add |
|
||||||
dsMatchStart equ 104+zlib1222add |
|
||||||
dsLookahead equ 108+zlib1222add |
|
||||||
dsPrevLen equ 112+zlib1222add |
|
||||||
dsMaxChainLen equ 116+zlib1222add |
|
||||||
dsGoodMatch equ 132+zlib1222add |
|
||||||
dsNiceMatch equ 136+zlib1222add |
|
||||||
|
|
||||||
|
|
||||||
;;; match.asm -- Pentium-Pro-optimized version of longest_match() |
|
||||||
;;; Written for zlib 1.1.2 |
|
||||||
;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
|
||||||
;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html |
|
||||||
;;; |
|
||||||
;;; This is free software; you can redistribute it and/or modify it |
|
||||||
;;; under the terms of the GNU General Public License. |
|
||||||
|
|
||||||
;GLOBAL _longest_match, _match_init |
|
||||||
|
|
||||||
|
|
||||||
;SECTION .text |
|
||||||
|
|
||||||
;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
|
||||||
|
|
||||||
;_longest_match: |
|
||||||
IFDEF NOOLDPENTIUMCODE |
|
||||||
IFDEF NOUNDERLINE |
|
||||||
longest_match proc near |
|
||||||
ELSE |
|
||||||
_longest_match proc near |
|
||||||
ENDIF |
|
||||||
ELSE |
|
||||||
IFDEF NOUNDERLINE |
|
||||||
longest_match_686 proc near |
|
||||||
ELSE |
|
||||||
_longest_match_686 proc near |
|
||||||
ENDIF |
|
||||||
ENDIF |
|
||||||
|
|
||||||
;;; Save registers that the compiler may be using, and adjust esp to |
|
||||||
;;; make room for our stack frame. |
|
||||||
|
|
||||||
push ebp |
|
||||||
push edi |
|
||||||
push esi |
|
||||||
push ebx |
|
||||||
sub esp, LocalVarsSize |
|
||||||
|
|
||||||
;;; Retrieve the function arguments. ecx will hold cur_match |
|
||||||
;;; throughout the entire function. edx will hold the pointer to the |
|
||||||
;;; deflate_state structure during the function's setup (before |
|
||||||
;;; entering the main loop. |
|
||||||
|
|
||||||
mov edx, [deflatestate] |
|
||||||
mov ecx, [curmatch] |
|
||||||
|
|
||||||
;;; uInt wmask = s->w_mask; |
|
||||||
;;; unsigned chain_length = s->max_chain_length; |
|
||||||
;;; if (s->prev_length >= s->good_match) { |
|
||||||
;;; chain_length >>= 2; |
|
||||||
;;; } |
|
||||||
|
|
||||||
mov eax, [edx + dsPrevLen] |
|
||||||
mov ebx, [edx + dsGoodMatch] |
|
||||||
cmp eax, ebx |
|
||||||
mov eax, [edx + dsWMask] |
|
||||||
mov ebx, [edx + dsMaxChainLen] |
|
||||||
jl LastMatchGood |
|
||||||
shr ebx, 2 |
|
||||||
LastMatchGood: |
|
||||||
|
|
||||||
;;; chainlen is decremented once beforehand so that the function can |
|
||||||
;;; use the sign flag instead of the zero flag for the exit test. |
|
||||||
;;; It is then shifted into the high word, to make room for the wmask |
|
||||||
;;; value, which it will always accompany. |
|
||||||
|
|
||||||
dec ebx |
|
||||||
shl ebx, 16 |
|
||||||
or ebx, eax |
|
||||||
mov [chainlenwmask], ebx |
|
||||||
|
|
||||||
;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
|
||||||
|
|
||||||
mov eax, [edx + dsNiceMatch] |
|
||||||
mov ebx, [edx + dsLookahead] |
|
||||||
cmp ebx, eax |
|
||||||
jl LookaheadLess |
|
||||||
mov ebx, eax |
|
||||||
LookaheadLess: mov [nicematch], ebx |
|
||||||
|
|
||||||
;;; register Bytef *scan = s->window + s->strstart; |
|
||||||
|
|
||||||
mov esi, [edx + dsWindow] |
|
||||||
mov [window], esi |
|
||||||
mov ebp, [edx + dsStrStart] |
|
||||||
lea edi, [esi + ebp] |
|
||||||
mov [scan], edi |
|
||||||
|
|
||||||
;;; Determine how many bytes the scan ptr is off from being |
|
||||||
;;; dword-aligned. |
|
||||||
|
|
||||||
mov eax, edi |
|
||||||
neg eax |
|
||||||
and eax, 3 |
|
||||||
mov [scanalign], eax |
|
||||||
|
|
||||||
;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
|
||||||
;;; s->strstart - (IPos)MAX_DIST(s) : NIL; |
|
||||||
|
|
||||||
mov eax, [edx + dsWSize] |
|
||||||
sub eax, MIN_LOOKAHEAD |
|
||||||
sub ebp, eax |
|
||||||
jg LimitPositive |
|
||||||
xor ebp, ebp |
|
||||||
LimitPositive: |
|
||||||
|
|
||||||
;;; int best_len = s->prev_length; |
|
||||||
|
|
||||||
mov eax, [edx + dsPrevLen] |
|
||||||
mov [bestlen], eax |
|
||||||
|
|
||||||
;;; Store the sum of s->window + best_len in esi locally, and in esi. |
|
||||||
|
|
||||||
add esi, eax |
|
||||||
mov [windowbestlen], esi |
|
||||||
|
|
||||||
;;; register ush scan_start = *(ushf*)scan; |
|
||||||
;;; register ush scan_end = *(ushf*)(scan+best_len-1); |
|
||||||
;;; Posf *prev = s->prev; |
|
||||||
|
|
||||||
movzx ebx, word ptr [edi] |
|
||||||
mov [scanstart], ebx |
|
||||||
movzx ebx, word ptr [edi + eax - 1] |
|
||||||
mov [scanend], ebx |
|
||||||
mov edi, [edx + dsPrev] |
|
||||||
|
|
||||||
;;; Jump into the main loop. |
|
||||||
|
|
||||||
mov edx, [chainlenwmask] |
|
||||||
jmp short LoopEntry |
|
||||||
|
|
||||||
align 4 |
|
||||||
|
|
||||||
;;; do { |
|
||||||
;;; match = s->window + cur_match; |
|
||||||
;;; if (*(ushf*)(match+best_len-1) != scan_end || |
|
||||||
;;; *(ushf*)match != scan_start) continue; |
|
||||||
;;; [...] |
|
||||||
;;; } while ((cur_match = prev[cur_match & wmask]) > limit |
|
||||||
;;; && --chain_length != 0); |
|
||||||
;;; |
|
||||||
;;; Here is the inner loop of the function. The function will spend the |
|
||||||
;;; majority of its time in this loop, and majority of that time will |
|
||||||
;;; be spent in the first ten instructions. |
|
||||||
;;; |
|
||||||
;;; Within this loop: |
|
||||||
;;; ebx = scanend |
|
||||||
;;; ecx = curmatch |
|
||||||
;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) |
|
||||||
;;; esi = windowbestlen - i.e., (window + bestlen) |
|
||||||
;;; edi = prev |
|
||||||
;;; ebp = limit |
|
||||||
|
|
||||||
LookupLoop: |
|
||||||
and ecx, edx |
|
||||||
movzx ecx, word ptr [edi + ecx*2] |
|
||||||
cmp ecx, ebp |
|
||||||
jbe LeaveNow |
|
||||||
sub edx, 00010000h |
|
||||||
js LeaveNow |
|
||||||
LoopEntry: movzx eax, word ptr [esi + ecx - 1] |
|
||||||
cmp eax, ebx |
|
||||||
jnz LookupLoop |
|
||||||
mov eax, [window] |
|
||||||
movzx eax, word ptr [eax + ecx] |
|
||||||
cmp eax, [scanstart] |
|
||||||
jnz LookupLoop |
|
||||||
|
|
||||||
;;; Store the current value of chainlen. |
|
||||||
|
|
||||||
mov [chainlenwmask], edx |
|
||||||
|
|
||||||
;;; Point edi to the string under scrutiny, and esi to the string we |
|
||||||
;;; are hoping to match it up with. In actuality, esi and edi are |
|
||||||
;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is |
|
||||||
;;; initialized to -(MAX_MATCH_8 - scanalign). |
|
||||||
|
|
||||||
mov esi, [window] |
|
||||||
mov edi, [scan] |
|
||||||
add esi, ecx |
|
||||||
mov eax, [scanalign] |
|
||||||
mov edx, 0fffffef8h; -(MAX_MATCH_8) |
|
||||||
lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] |
|
||||||
lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] |
|
||||||
|
|
||||||
;;; Test the strings for equality, 8 bytes at a time. At the end, |
|
||||||
;;; adjust edx so that it is offset to the exact byte that mismatched. |
|
||||||
;;; |
|
||||||
;;; We already know at this point that the first three bytes of the |
|
||||||
;;; strings match each other, and they can be safely passed over before |
|
||||||
;;; starting the compare loop. So what this code does is skip over 0-3 |
|
||||||
;;; bytes, as much as necessary in order to dword-align the edi |
|
||||||
;;; pointer. (esi will still be misaligned three times out of four.) |
|
||||||
;;; |
|
||||||
;;; It should be confessed that this loop usually does not represent |
|
||||||
;;; much of the total running time. Replacing it with a more |
|
||||||
;;; straightforward "rep cmpsb" would not drastically degrade |
|
||||||
;;; performance. |
|
||||||
|
|
||||||
LoopCmps: |
|
||||||
mov eax, [esi + edx] |
|
||||||
xor eax, [edi + edx] |
|
||||||
jnz LeaveLoopCmps |
|
||||||
mov eax, [esi + edx + 4] |
|
||||||
xor eax, [edi + edx + 4] |
|
||||||
jnz LeaveLoopCmps4 |
|
||||||
add edx, 8 |
|
||||||
jnz LoopCmps |
|
||||||
jmp short LenMaximum |
|
||||||
LeaveLoopCmps4: add edx, 4 |
|
||||||
LeaveLoopCmps: test eax, 0000FFFFh |
|
||||||
jnz LenLower |
|
||||||
add edx, 2 |
|
||||||
shr eax, 16 |
|
||||||
LenLower: sub al, 1 |
|
||||||
adc edx, 0 |
|
||||||
|
|
||||||
;;; Calculate the length of the match. If it is longer than MAX_MATCH, |
|
||||||
;;; then automatically accept it as the best possible match and leave. |
|
||||||
|
|
||||||
lea eax, [edi + edx] |
|
||||||
mov edi, [scan] |
|
||||||
sub eax, edi |
|
||||||
cmp eax, MAX_MATCH |
|
||||||
jge LenMaximum |
|
||||||
|
|
||||||
;;; If the length of the match is not longer than the best match we |
|
||||||
;;; have so far, then forget it and return to the lookup loop. |
|
||||||
|
|
||||||
mov edx, [deflatestate] |
|
||||||
mov ebx, [bestlen] |
|
||||||
cmp eax, ebx |
|
||||||
jg LongerMatch |
|
||||||
mov esi, [windowbestlen] |
|
||||||
mov edi, [edx + dsPrev] |
|
||||||
mov ebx, [scanend] |
|
||||||
mov edx, [chainlenwmask] |
|
||||||
jmp LookupLoop |
|
||||||
|
|
||||||
;;; s->match_start = cur_match; |
|
||||||
;;; best_len = len; |
|
||||||
;;; if (len >= nice_match) break; |
|
||||||
;;; scan_end = *(ushf*)(scan+best_len-1); |
|
||||||
|
|
||||||
LongerMatch: mov ebx, [nicematch] |
|
||||||
mov [bestlen], eax |
|
||||||
mov [edx + dsMatchStart], ecx |
|
||||||
cmp eax, ebx |
|
||||||
jge LeaveNow |
|
||||||
mov esi, [window] |
|
||||||
add esi, eax |
|
||||||
mov [windowbestlen], esi |
|
||||||
movzx ebx, word ptr [edi + eax - 1] |
|
||||||
mov edi, [edx + dsPrev] |
|
||||||
mov [scanend], ebx |
|
||||||
mov edx, [chainlenwmask] |
|
||||||
jmp LookupLoop |
|
||||||
|
|
||||||
;;; Accept the current string, with the maximum possible length. |
|
||||||
|
|
||||||
LenMaximum: mov edx, [deflatestate] |
|
||||||
mov dword ptr [bestlen], MAX_MATCH |
|
||||||
mov [edx + dsMatchStart], ecx |
|
||||||
|
|
||||||
;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; |
|
||||||
;;; return s->lookahead; |
|
||||||
|
|
||||||
LeaveNow: |
|
||||||
mov edx, [deflatestate] |
|
||||||
mov ebx, [bestlen] |
|
||||||
mov eax, [edx + dsLookahead] |
|
||||||
cmp ebx, eax |
|
||||||
jg LookaheadRet |
|
||||||
mov eax, ebx |
|
||||||
LookaheadRet: |
|
||||||
|
|
||||||
;;; Restore the stack and return from whence we came. |
|
||||||
|
|
||||||
add esp, LocalVarsSize |
|
||||||
pop ebx |
|
||||||
pop esi |
|
||||||
pop edi |
|
||||||
pop ebp |
|
||||||
|
|
||||||
ret |
|
||||||
; please don't remove this string ! |
|
||||||
; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary! |
|
||||||
db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah |
|
||||||
|
|
||||||
|
|
||||||
IFDEF NOOLDPENTIUMCODE |
|
||||||
IFDEF NOUNDERLINE |
|
||||||
longest_match endp |
|
||||||
ELSE |
|
||||||
_longest_match endp |
|
||||||
ENDIF |
|
||||||
|
|
||||||
IFDEF NOUNDERLINE |
|
||||||
match_init proc near |
|
||||||
ret |
|
||||||
match_init endp |
|
||||||
ELSE |
|
||||||
_match_init proc near |
|
||||||
ret |
|
||||||
_match_init endp |
|
||||||
ENDIF |
|
||||||
ELSE |
|
||||||
IFDEF NOUNDERLINE |
|
||||||
longest_match_686 endp |
|
||||||
ELSE |
|
||||||
_longest_match_686 endp |
|
||||||
ENDIF |
|
||||||
ENDIF |
|
||||||
|
|
||||||
_TEXT ends |
|
||||||
end |
|
@ -1,62 +0,0 @@ |
|||||||
/* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86
|
|
||||||
* Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. |
|
||||||
* File written by Gilles Vollant, by modifiying the longest_match |
|
||||||
* from Jean-loup Gailly in deflate.c |
|
||||||
* it prepare all parameters and call the assembly longest_match_gvasm |
|
||||||
* longest_match execute standard C code is wmask != 0x7fff |
|
||||||
* (assembly code is faster with a fixed wmask) |
|
||||||
* |
|
||||||
* Read comment at beginning of gvmat32.asm for more information |
|
||||||
*/ |
|
||||||
|
|
||||||
#if defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) |
|
||||||
#include "deflate.h" |
|
||||||
|
|
||||||
/* if your C compiler don't add underline before function name,
|
|
||||||
define ADD_UNDERLINE_ASMFUNC */ |
|
||||||
#ifdef ADD_UNDERLINE_ASMFUNC |
|
||||||
#define longest_match_7fff _longest_match_7fff |
|
||||||
#define longest_match_686 _longest_match_686 |
|
||||||
#define cpudetect32 _cpudetect32 |
|
||||||
#endif |
|
||||||
|
|
||||||
|
|
||||||
unsigned long cpudetect32(); |
|
||||||
|
|
||||||
uInt longest_match_c( |
|
||||||
deflate_state *s, |
|
||||||
IPos cur_match); /* current match */ |
|
||||||
|
|
||||||
|
|
||||||
uInt longest_match_7fff( |
|
||||||
deflate_state *s, |
|
||||||
IPos cur_match); /* current match */ |
|
||||||
|
|
||||||
uInt longest_match_686( |
|
||||||
deflate_state *s, |
|
||||||
IPos cur_match); /* current match */ |
|
||||||
|
|
||||||
|
|
||||||
static uInt iIsPPro=2; |
|
||||||
|
|
||||||
void match_init () |
|
||||||
{ |
|
||||||
iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0; |
|
||||||
} |
|
||||||
|
|
||||||
uInt longest_match( |
|
||||||
deflate_state *s, |
|
||||||
IPos cur_match) /* current match */ |
|
||||||
{ |
|
||||||
if (iIsPPro!=0) |
|
||||||
return longest_match_686(s,cur_match); |
|
||||||
|
|
||||||
if (s->w_mask != 0x7fff) |
|
||||||
return longest_match_686(s,cur_match); |
|
||||||
|
|
||||||
/* now ((s->w_mask == 0x7fff) && (iIsPPro==0)) */ |
|
||||||
return longest_match_7fff(s,cur_match); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
#endif /* defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) */ |
|
@ -0,0 +1,478 @@ |
|||||||
|
; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86 |
||||||
|
; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant. |
||||||
|
; File written by Gilles Vollant, by converting match686.S from Brian Raiter |
||||||
|
; for MASM. This is as assembly version of longest_match |
||||||
|
; from Jean-loup Gailly in deflate.c |
||||||
|
; |
||||||
|
; http://www.zlib.net |
||||||
|
; http://www.winimage.com/zLibDll |
||||||
|
; http://www.muppetlabs.com/~breadbox/software/assembly.html |
||||||
|
; |
||||||
|
; For Visual C++ 4.x and higher and ML 6.x and higher |
||||||
|
; ml.exe is distributed in |
||||||
|
; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64 |
||||||
|
; |
||||||
|
; this file contain two implementation of longest_match |
||||||
|
; |
||||||
|
; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro |
||||||
|
; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom) |
||||||
|
; |
||||||
|
; for using an assembly version of longest_match, you need define ASMV in project |
||||||
|
; |
||||||
|
; compile the asm file running |
||||||
|
; ml /coff /Zi /c /Flmatch686.lst match686.asm |
||||||
|
; and do not include match686.obj in your project |
||||||
|
; |
||||||
|
; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for |
||||||
|
; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor |
||||||
|
; with autoselect (with cpu detection code) |
||||||
|
; if you want support the old pentium optimization, you can still use these version |
||||||
|
; |
||||||
|
; this file is not optimized for old pentium, but it compatible with all x86 32 bits |
||||||
|
; processor (starting 80386) |
||||||
|
; |
||||||
|
; |
||||||
|
; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 |
||||||
|
|
||||||
|
;uInt longest_match(s, cur_match) |
||||||
|
; deflate_state *s; |
||||||
|
; IPos cur_match; /* current match */ |
||||||
|
|
||||||
|
NbStack equ 76 |
||||||
|
cur_match equ dword ptr[esp+NbStack-0] |
||||||
|
str_s equ dword ptr[esp+NbStack-4] |
||||||
|
; 5 dword on top (ret,ebp,esi,edi,ebx) |
||||||
|
adrret equ dword ptr[esp+NbStack-8] |
||||||
|
pushebp equ dword ptr[esp+NbStack-12] |
||||||
|
pushedi equ dword ptr[esp+NbStack-16] |
||||||
|
pushesi equ dword ptr[esp+NbStack-20] |
||||||
|
pushebx equ dword ptr[esp+NbStack-24] |
||||||
|
|
||||||
|
chain_length equ dword ptr [esp+NbStack-28] |
||||||
|
limit equ dword ptr [esp+NbStack-32] |
||||||
|
best_len equ dword ptr [esp+NbStack-36] |
||||||
|
window equ dword ptr [esp+NbStack-40] |
||||||
|
prev equ dword ptr [esp+NbStack-44] |
||||||
|
scan_start equ word ptr [esp+NbStack-48] |
||||||
|
wmask equ dword ptr [esp+NbStack-52] |
||||||
|
match_start_ptr equ dword ptr [esp+NbStack-56] |
||||||
|
nice_match equ dword ptr [esp+NbStack-60] |
||||||
|
scan equ dword ptr [esp+NbStack-64] |
||||||
|
|
||||||
|
windowlen equ dword ptr [esp+NbStack-68] |
||||||
|
match_start equ dword ptr [esp+NbStack-72] |
||||||
|
strend equ dword ptr [esp+NbStack-76] |
||||||
|
NbStackAdd equ (NbStack-24) |
||||||
|
|
||||||
|
.386p |
||||||
|
|
||||||
|
name gvmatch |
||||||
|
.MODEL FLAT |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
; all the +zlib1222add offsets are due to the addition of fields |
||||||
|
; in zlib in the deflate_state structure since the asm code was first written |
||||||
|
; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). |
||||||
|
; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). |
||||||
|
; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). |
||||||
|
|
||||||
|
zlib1222add equ 8 |
||||||
|
|
||||||
|
; Note : these value are good with a 8 bytes boundary pack structure |
||||||
|
dep_chain_length equ 74h+zlib1222add |
||||||
|
dep_window equ 30h+zlib1222add |
||||||
|
dep_strstart equ 64h+zlib1222add |
||||||
|
dep_prev_length equ 70h+zlib1222add |
||||||
|
dep_nice_match equ 88h+zlib1222add |
||||||
|
dep_w_size equ 24h+zlib1222add |
||||||
|
dep_prev equ 38h+zlib1222add |
||||||
|
dep_w_mask equ 2ch+zlib1222add |
||||||
|
dep_good_match equ 84h+zlib1222add |
||||||
|
dep_match_start equ 68h+zlib1222add |
||||||
|
dep_lookahead equ 6ch+zlib1222add |
||||||
|
|
||||||
|
|
||||||
|
_TEXT segment |
||||||
|
|
||||||
|
IFDEF NOUNDERLINE |
||||||
|
public longest_match |
||||||
|
public match_init |
||||||
|
ELSE |
||||||
|
public _longest_match |
||||||
|
public _match_init |
||||||
|
ENDIF |
||||||
|
|
||||||
|
MAX_MATCH equ 258 |
||||||
|
MIN_MATCH equ 3 |
||||||
|
MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
MAX_MATCH equ 258 |
||||||
|
MIN_MATCH equ 3 |
||||||
|
MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) |
||||||
|
MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) |
||||||
|
|
||||||
|
|
||||||
|
;;; stack frame offsets |
||||||
|
|
||||||
|
chainlenwmask equ esp + 0 ; high word: current chain len |
||||||
|
; low word: s->wmask |
||||||
|
window equ esp + 4 ; local copy of s->window |
||||||
|
windowbestlen equ esp + 8 ; s->window + bestlen |
||||||
|
scanstart equ esp + 16 ; first two bytes of string |
||||||
|
scanend equ esp + 12 ; last two bytes of string |
||||||
|
scanalign equ esp + 20 ; dword-misalignment of string |
||||||
|
nicematch equ esp + 24 ; a good enough match size |
||||||
|
bestlen equ esp + 28 ; size of best match so far |
||||||
|
scan equ esp + 32 ; ptr to string wanting match |
||||||
|
|
||||||
|
LocalVarsSize equ 36 |
||||||
|
; saved ebx byte esp + 36 |
||||||
|
; saved edi byte esp + 40 |
||||||
|
; saved esi byte esp + 44 |
||||||
|
; saved ebp byte esp + 48 |
||||||
|
; return address byte esp + 52 |
||||||
|
deflatestate equ esp + 56 ; the function arguments |
||||||
|
curmatch equ esp + 60 |
||||||
|
|
||||||
|
;;; Offsets for fields in the deflate_state structure. These numbers |
||||||
|
;;; are calculated from the definition of deflate_state, with the |
||||||
|
;;; assumption that the compiler will dword-align the fields. (Thus, |
||||||
|
;;; changing the definition of deflate_state could easily cause this |
||||||
|
;;; program to crash horribly, without so much as a warning at |
||||||
|
;;; compile time. Sigh.) |
||||||
|
|
||||||
|
dsWSize equ 36+zlib1222add |
||||||
|
dsWMask equ 44+zlib1222add |
||||||
|
dsWindow equ 48+zlib1222add |
||||||
|
dsPrev equ 56+zlib1222add |
||||||
|
dsMatchLen equ 88+zlib1222add |
||||||
|
dsPrevMatch equ 92+zlib1222add |
||||||
|
dsStrStart equ 100+zlib1222add |
||||||
|
dsMatchStart equ 104+zlib1222add |
||||||
|
dsLookahead equ 108+zlib1222add |
||||||
|
dsPrevLen equ 112+zlib1222add |
||||||
|
dsMaxChainLen equ 116+zlib1222add |
||||||
|
dsGoodMatch equ 132+zlib1222add |
||||||
|
dsNiceMatch equ 136+zlib1222add |
||||||
|
|
||||||
|
|
||||||
|
;;; match686.asm -- Pentium-Pro-optimized version of longest_match() |
||||||
|
;;; Written for zlib 1.1.2 |
||||||
|
;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
||||||
|
;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html |
||||||
|
;;; |
||||||
|
;; |
||||||
|
;; This software is provided 'as-is', without any express or implied |
||||||
|
;; warranty. In no event will the authors be held liable for any damages |
||||||
|
;; arising from the use of this software. |
||||||
|
;; |
||||||
|
;; Permission is granted to anyone to use this software for any purpose, |
||||||
|
;; including commercial applications, and to alter it and redistribute it |
||||||
|
;; freely, subject to the following restrictions: |
||||||
|
;; |
||||||
|
;; 1. The origin of this software must not be misrepresented; you must not |
||||||
|
;; claim that you wrote the original software. If you use this software |
||||||
|
;; in a product, an acknowledgment in the product documentation would be |
||||||
|
;; appreciated but is not required. |
||||||
|
;; 2. Altered source versions must be plainly marked as such, and must not be |
||||||
|
;; misrepresented as being the original software |
||||||
|
;; 3. This notice may not be removed or altered from any source distribution. |
||||||
|
;; |
||||||
|
|
||||||
|
;GLOBAL _longest_match, _match_init |
||||||
|
|
||||||
|
|
||||||
|
;SECTION .text |
||||||
|
|
||||||
|
;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
||||||
|
|
||||||
|
;_longest_match: |
||||||
|
IFDEF NOUNDERLINE |
||||||
|
longest_match proc near |
||||||
|
ELSE |
||||||
|
_longest_match proc near |
||||||
|
ENDIF |
||||||
|
|
||||||
|
;;; Save registers that the compiler may be using, and adjust esp to |
||||||
|
;;; make room for our stack frame. |
||||||
|
|
||||||
|
push ebp |
||||||
|
push edi |
||||||
|
push esi |
||||||
|
push ebx |
||||||
|
sub esp, LocalVarsSize |
||||||
|
|
||||||
|
;;; Retrieve the function arguments. ecx will hold cur_match |
||||||
|
;;; throughout the entire function. edx will hold the pointer to the |
||||||
|
;;; deflate_state structure during the function's setup (before |
||||||
|
;;; entering the main loop. |
||||||
|
|
||||||
|
mov edx, [deflatestate] |
||||||
|
mov ecx, [curmatch] |
||||||
|
|
||||||
|
;;; uInt wmask = s->w_mask; |
||||||
|
;;; unsigned chain_length = s->max_chain_length; |
||||||
|
;;; if (s->prev_length >= s->good_match) { |
||||||
|
;;; chain_length >>= 2; |
||||||
|
;;; } |
||||||
|
|
||||||
|
mov eax, [edx + dsPrevLen] |
||||||
|
mov ebx, [edx + dsGoodMatch] |
||||||
|
cmp eax, ebx |
||||||
|
mov eax, [edx + dsWMask] |
||||||
|
mov ebx, [edx + dsMaxChainLen] |
||||||
|
jl LastMatchGood |
||||||
|
shr ebx, 2 |
||||||
|
LastMatchGood: |
||||||
|
|
||||||
|
;;; chainlen is decremented once beforehand so that the function can |
||||||
|
;;; use the sign flag instead of the zero flag for the exit test. |
||||||
|
;;; It is then shifted into the high word, to make room for the wmask |
||||||
|
;;; value, which it will always accompany. |
||||||
|
|
||||||
|
dec ebx |
||||||
|
shl ebx, 16 |
||||||
|
or ebx, eax |
||||||
|
mov [chainlenwmask], ebx |
||||||
|
|
||||||
|
;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
||||||
|
|
||||||
|
mov eax, [edx + dsNiceMatch] |
||||||
|
mov ebx, [edx + dsLookahead] |
||||||
|
cmp ebx, eax |
||||||
|
jl LookaheadLess |
||||||
|
mov ebx, eax |
||||||
|
LookaheadLess: mov [nicematch], ebx |
||||||
|
|
||||||
|
;;; register Bytef *scan = s->window + s->strstart; |
||||||
|
|
||||||
|
mov esi, [edx + dsWindow] |
||||||
|
mov [window], esi |
||||||
|
mov ebp, [edx + dsStrStart] |
||||||
|
lea edi, [esi + ebp] |
||||||
|
mov [scan], edi |
||||||
|
|
||||||
|
;;; Determine how many bytes the scan ptr is off from being |
||||||
|
;;; dword-aligned. |
||||||
|
|
||||||
|
mov eax, edi |
||||||
|
neg eax |
||||||
|
and eax, 3 |
||||||
|
mov [scanalign], eax |
||||||
|
|
||||||
|
;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
||||||
|
;;; s->strstart - (IPos)MAX_DIST(s) : NIL; |
||||||
|
|
||||||
|
mov eax, [edx + dsWSize] |
||||||
|
sub eax, MIN_LOOKAHEAD |
||||||
|
sub ebp, eax |
||||||
|
jg LimitPositive |
||||||
|
xor ebp, ebp |
||||||
|
LimitPositive: |
||||||
|
|
||||||
|
;;; int best_len = s->prev_length; |
||||||
|
|
||||||
|
mov eax, [edx + dsPrevLen] |
||||||
|
mov [bestlen], eax |
||||||
|
|
||||||
|
;;; Store the sum of s->window + best_len in esi locally, and in esi. |
||||||
|
|
||||||
|
add esi, eax |
||||||
|
mov [windowbestlen], esi |
||||||
|
|
||||||
|
;;; register ush scan_start = *(ushf*)scan; |
||||||
|
;;; register ush scan_end = *(ushf*)(scan+best_len-1); |
||||||
|
;;; Posf *prev = s->prev; |
||||||
|
|
||||||
|
movzx ebx, word ptr [edi] |
||||||
|
mov [scanstart], ebx |
||||||
|
movzx ebx, word ptr [edi + eax - 1] |
||||||
|
mov [scanend], ebx |
||||||
|
mov edi, [edx + dsPrev] |
||||||
|
|
||||||
|
;;; Jump into the main loop. |
||||||
|
|
||||||
|
mov edx, [chainlenwmask] |
||||||
|
jmp short LoopEntry |
||||||
|
|
||||||
|
align 4 |
||||||
|
|
||||||
|
;;; do { |
||||||
|
;;; match = s->window + cur_match; |
||||||
|
;;; if (*(ushf*)(match+best_len-1) != scan_end || |
||||||
|
;;; *(ushf*)match != scan_start) continue; |
||||||
|
;;; [...] |
||||||
|
;;; } while ((cur_match = prev[cur_match & wmask]) > limit |
||||||
|
;;; && --chain_length != 0); |
||||||
|
;;; |
||||||
|
;;; Here is the inner loop of the function. The function will spend the |
||||||
|
;;; majority of its time in this loop, and majority of that time will |
||||||
|
;;; be spent in the first ten instructions. |
||||||
|
;;; |
||||||
|
;;; Within this loop: |
||||||
|
;;; ebx = scanend |
||||||
|
;;; ecx = curmatch |
||||||
|
;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) |
||||||
|
;;; esi = windowbestlen - i.e., (window + bestlen) |
||||||
|
;;; edi = prev |
||||||
|
;;; ebp = limit |
||||||
|
|
||||||
|
LookupLoop: |
||||||
|
and ecx, edx |
||||||
|
movzx ecx, word ptr [edi + ecx*2] |
||||||
|
cmp ecx, ebp |
||||||
|
jbe LeaveNow |
||||||
|
sub edx, 00010000h |
||||||
|
js LeaveNow |
||||||
|
LoopEntry: movzx eax, word ptr [esi + ecx - 1] |
||||||
|
cmp eax, ebx |
||||||
|
jnz LookupLoop |
||||||
|
mov eax, [window] |
||||||
|
movzx eax, word ptr [eax + ecx] |
||||||
|
cmp eax, [scanstart] |
||||||
|
jnz LookupLoop |
||||||
|
|
||||||
|
;;; Store the current value of chainlen. |
||||||
|
|
||||||
|
mov [chainlenwmask], edx |
||||||
|
|
||||||
|
;;; Point edi to the string under scrutiny, and esi to the string we |
||||||
|
;;; are hoping to match it up with. In actuality, esi and edi are |
||||||
|
;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is |
||||||
|
;;; initialized to -(MAX_MATCH_8 - scanalign). |
||||||
|
|
||||||
|
mov esi, [window] |
||||||
|
mov edi, [scan] |
||||||
|
add esi, ecx |
||||||
|
mov eax, [scanalign] |
||||||
|
mov edx, 0fffffef8h; -(MAX_MATCH_8) |
||||||
|
lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] |
||||||
|
lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] |
||||||
|
|
||||||
|
;;; Test the strings for equality, 8 bytes at a time. At the end, |
||||||
|
;;; adjust edx so that it is offset to the exact byte that mismatched. |
||||||
|
;;; |
||||||
|
;;; We already know at this point that the first three bytes of the |
||||||
|
;;; strings match each other, and they can be safely passed over before |
||||||
|
;;; starting the compare loop. So what this code does is skip over 0-3 |
||||||
|
;;; bytes, as much as necessary in order to dword-align the edi |
||||||
|
;;; pointer. (esi will still be misaligned three times out of four.) |
||||||
|
;;; |
||||||
|
;;; It should be confessed that this loop usually does not represent |
||||||
|
;;; much of the total running time. Replacing it with a more |
||||||
|
;;; straightforward "rep cmpsb" would not drastically degrade |
||||||
|
;;; performance. |
||||||
|
|
||||||
|
LoopCmps: |
||||||
|
mov eax, [esi + edx] |
||||||
|
xor eax, [edi + edx] |
||||||
|
jnz LeaveLoopCmps |
||||||
|
mov eax, [esi + edx + 4] |
||||||
|
xor eax, [edi + edx + 4] |
||||||
|
jnz LeaveLoopCmps4 |
||||||
|
add edx, 8 |
||||||
|
jnz LoopCmps |
||||||
|
jmp short LenMaximum |
||||||
|
LeaveLoopCmps4: add edx, 4 |
||||||
|
LeaveLoopCmps: test eax, 0000FFFFh |
||||||
|
jnz LenLower |
||||||
|
add edx, 2 |
||||||
|
shr eax, 16 |
||||||
|
LenLower: sub al, 1 |
||||||
|
adc edx, 0 |
||||||
|
|
||||||
|
;;; Calculate the length of the match. If it is longer than MAX_MATCH, |
||||||
|
;;; then automatically accept it as the best possible match and leave. |
||||||
|
|
||||||
|
lea eax, [edi + edx] |
||||||
|
mov edi, [scan] |
||||||
|
sub eax, edi |
||||||
|
cmp eax, MAX_MATCH |
||||||
|
jge LenMaximum |
||||||
|
|
||||||
|
;;; If the length of the match is not longer than the best match we |
||||||
|
;;; have so far, then forget it and return to the lookup loop. |
||||||
|
|
||||||
|
mov edx, [deflatestate] |
||||||
|
mov ebx, [bestlen] |
||||||
|
cmp eax, ebx |
||||||
|
jg LongerMatch |
||||||
|
mov esi, [windowbestlen] |
||||||
|
mov edi, [edx + dsPrev] |
||||||
|
mov ebx, [scanend] |
||||||
|
mov edx, [chainlenwmask] |
||||||
|
jmp LookupLoop |
||||||
|
|
||||||
|
;;; s->match_start = cur_match; |
||||||
|
;;; best_len = len; |
||||||
|
;;; if (len >= nice_match) break; |
||||||
|
;;; scan_end = *(ushf*)(scan+best_len-1); |
||||||
|
|
||||||
|
LongerMatch: mov ebx, [nicematch] |
||||||
|
mov [bestlen], eax |
||||||
|
mov [edx + dsMatchStart], ecx |
||||||
|
cmp eax, ebx |
||||||
|
jge LeaveNow |
||||||
|
mov esi, [window] |
||||||
|
add esi, eax |
||||||
|
mov [windowbestlen], esi |
||||||
|
movzx ebx, word ptr [edi + eax - 1] |
||||||
|
mov edi, [edx + dsPrev] |
||||||
|
mov [scanend], ebx |
||||||
|
mov edx, [chainlenwmask] |
||||||
|
jmp LookupLoop |
||||||
|
|
||||||
|
;;; Accept the current string, with the maximum possible length. |
||||||
|
|
||||||
|
LenMaximum: mov edx, [deflatestate] |
||||||
|
mov dword ptr [bestlen], MAX_MATCH |
||||||
|
mov [edx + dsMatchStart], ecx |
||||||
|
|
||||||
|
;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; |
||||||
|
;;; return s->lookahead; |
||||||
|
|
||||||
|
LeaveNow: |
||||||
|
mov edx, [deflatestate] |
||||||
|
mov ebx, [bestlen] |
||||||
|
mov eax, [edx + dsLookahead] |
||||||
|
cmp ebx, eax |
||||||
|
jg LookaheadRet |
||||||
|
mov eax, ebx |
||||||
|
LookaheadRet: |
||||||
|
|
||||||
|
;;; Restore the stack and return from whence we came. |
||||||
|
|
||||||
|
add esp, LocalVarsSize |
||||||
|
pop ebx |
||||||
|
pop esi |
||||||
|
pop edi |
||||||
|
pop ebp |
||||||
|
|
||||||
|
ret |
||||||
|
; please don't remove this string ! |
||||||
|
; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary! |
||||||
|
db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah |
||||||
|
|
||||||
|
|
||||||
|
IFDEF NOUNDERLINE |
||||||
|
longest_match endp |
||||||
|
ELSE |
||||||
|
_longest_match endp |
||||||
|
ENDIF |
||||||
|
|
||||||
|
IFDEF NOUNDERLINE |
||||||
|
match_init proc near |
||||||
|
ret |
||||||
|
match_init endp |
||||||
|
ELSE |
||||||
|
_match_init proc near |
||||||
|
ret |
||||||
|
_match_init endp |
||||||
|
ENDIF |
||||||
|
|
||||||
|
|
||||||
|
_TEXT ends |
||||||
|
end |
Binary file not shown.
@ -1,3 +0,0 @@ |
|||||||
cl /DASMV /I..\.. /O2 /c gvmat32c.c |
|
||||||
ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm |
|
||||||
ml /coff /Zi /c /Flinffas32.lst inffas32.asm |
|
@ -1,114 +0,0 @@ |
|||||||
|
|
||||||
VERSION 1.23 |
|
||||||
|
|
||||||
HEAPSIZE 1048576,8192 |
|
||||||
|
|
||||||
EXPORTS |
|
||||||
adler32 @1 |
|
||||||
compress @2 |
|
||||||
crc32 @3 |
|
||||||
deflate @4 |
|
||||||
deflateCopy @5 |
|
||||||
deflateEnd @6 |
|
||||||
deflateInit2_ @7 |
|
||||||
deflateInit_ @8 |
|
||||||
deflateParams @9 |
|
||||||
deflateReset @10 |
|
||||||
deflateSetDictionary @11 |
|
||||||
gzclose @12 |
|
||||||
gzdopen @13 |
|
||||||
gzerror @14 |
|
||||||
gzflush @15 |
|
||||||
gzopen @16 |
|
||||||
gzread @17 |
|
||||||
gzwrite @18 |
|
||||||
inflate @19 |
|
||||||
inflateEnd @20 |
|
||||||
inflateInit2_ @21 |
|
||||||
inflateInit_ @22 |
|
||||||
inflateReset @23 |
|
||||||
inflateSetDictionary @24 |
|
||||||
inflateSync @25 |
|
||||||
uncompress @26 |
|
||||||
zlibVersion @27 |
|
||||||
gzprintf @28 |
|
||||||
gzputc @29 |
|
||||||
gzgetc @30 |
|
||||||
gzseek @31 |
|
||||||
gzrewind @32 |
|
||||||
gztell @33 |
|
||||||
gzeof @34 |
|
||||||
gzsetparams @35 |
|
||||||
zError @36 |
|
||||||
inflateSyncPoint @37 |
|
||||||
get_crc_table @38 |
|
||||||
compress2 @39 |
|
||||||
gzputs @40 |
|
||||||
gzgets @41 |
|
||||||
inflateCopy @42 |
|
||||||
inflateBackInit_ @43 |
|
||||||
inflateBack @44 |
|
||||||
inflateBackEnd @45 |
|
||||||
compressBound @46 |
|
||||||
deflateBound @47 |
|
||||||
gzclearerr @48 |
|
||||||
gzungetc @49 |
|
||||||
zlibCompileFlags @50 |
|
||||||
deflatePrime @51 |
|
||||||
|
|
||||||
unzOpen @61 |
|
||||||
unzClose @62 |
|
||||||
unzGetGlobalInfo @63 |
|
||||||
unzGetCurrentFileInfo @64 |
|
||||||
unzGoToFirstFile @65 |
|
||||||
unzGoToNextFile @66 |
|
||||||
unzOpenCurrentFile @67 |
|
||||||
unzReadCurrentFile @68 |
|
||||||
unzOpenCurrentFile3 @69 |
|
||||||
unztell @70 |
|
||||||
unzeof @71 |
|
||||||
unzCloseCurrentFile @72 |
|
||||||
unzGetGlobalComment @73 |
|
||||||
unzStringFileNameCompare @74 |
|
||||||
unzLocateFile @75 |
|
||||||
unzGetLocalExtrafield @76 |
|
||||||
unzOpen2 @77 |
|
||||||
unzOpenCurrentFile2 @78 |
|
||||||
unzOpenCurrentFilePassword @79 |
|
||||||
|
|
||||||
zipOpen @80 |
|
||||||
zipOpenNewFileInZip @81 |
|
||||||
zipWriteInFileInZip @82 |
|
||||||
zipCloseFileInZip @83 |
|
||||||
zipClose @84 |
|
||||||
zipOpenNewFileInZip2 @86 |
|
||||||
zipCloseFileInZipRaw @87 |
|
||||||
zipOpen2 @88 |
|
||||||
zipOpenNewFileInZip3 @89 |
|
||||||
|
|
||||||
unzGetFilePos @100 |
|
||||||
unzGoToFilePos @101 |
|
||||||
|
|
||||||
fill_win32_filefunc @110 |
|
||||||
fill_win32_filefunc64 @111 |
|
||||||
fill_win32_filefunc64A @112 |
|
||||||
fill_win32_filefunc64W @113 |
|
||||||
|
|
||||||
; quick hack by hkuno@microhouse.co.jp |
|
||||||
unzOpen64 @120 |
|
||||||
unzOpen2_64 @121 |
|
||||||
unzGetGlobalInfo64 @122 |
|
||||||
unzGetCurrentFileInfo64 @124 |
|
||||||
unzGetCurrentFileZStreamPos64 @125 |
|
||||||
unztell64 @126 |
|
||||||
unzGetFilePos64 @127 |
|
||||||
unzGoToFilePos64 @128 |
|
||||||
|
|
||||||
zipOpen64 @130 |
|
||||||
zipOpen2_64 @131 |
|
||||||
zipOpenNewFileInZip64 @132 |
|
||||||
zipOpenNewFileInZip2_64 @133 |
|
||||||
zipOpenNewFileInZip3_64 @134 |
|
||||||
zipOpenNewFileInZip4_64 @135 |
|
||||||
zipCloseFileInZipRaw64 @136 |
|
||||||
; end hack |
|
@ -1,114 +0,0 @@ |
|||||||
|
|
||||||
VERSION 1.23 |
|
||||||
|
|
||||||
HEAPSIZE 1048576,8192 |
|
||||||
|
|
||||||
EXPORTS |
|
||||||
adler32 @1 |
|
||||||
compress @2 |
|
||||||
crc32 @3 |
|
||||||
deflate @4 |
|
||||||
deflateCopy @5 |
|
||||||
deflateEnd @6 |
|
||||||
deflateInit2_ @7 |
|
||||||
deflateInit_ @8 |
|
||||||
deflateParams @9 |
|
||||||
deflateReset @10 |
|
||||||
deflateSetDictionary @11 |
|
||||||
gzclose @12 |
|
||||||
gzdopen @13 |
|
||||||
gzerror @14 |
|
||||||
gzflush @15 |
|
||||||
gzopen @16 |
|
||||||
gzread @17 |
|
||||||
gzwrite @18 |
|
||||||
inflate @19 |
|
||||||
inflateEnd @20 |
|
||||||
inflateInit2_ @21 |
|
||||||
inflateInit_ @22 |
|
||||||
inflateReset @23 |
|
||||||
inflateSetDictionary @24 |
|
||||||
inflateSync @25 |
|
||||||
uncompress @26 |
|
||||||
zlibVersion @27 |
|
||||||
gzprintf @28 |
|
||||||
gzputc @29 |
|
||||||
gzgetc @30 |
|
||||||
gzseek @31 |
|
||||||
gzrewind @32 |
|
||||||
gztell @33 |
|
||||||
gzeof @34 |
|
||||||
gzsetparams @35 |
|
||||||
zError @36 |
|
||||||
inflateSyncPoint @37 |
|
||||||
get_crc_table @38 |
|
||||||
compress2 @39 |
|
||||||
gzputs @40 |
|
||||||
gzgets @41 |
|
||||||
inflateCopy @42 |
|
||||||
inflateBackInit_ @43 |
|
||||||
inflateBack @44 |
|
||||||
inflateBackEnd @45 |
|
||||||
compressBound @46 |
|
||||||
deflateBound @47 |
|
||||||
gzclearerr @48 |
|
||||||
gzungetc @49 |
|
||||||
zlibCompileFlags @50 |
|
||||||
deflatePrime @51 |
|
||||||
|
|
||||||
unzOpen @61 |
|
||||||
unzClose @62 |
|
||||||
unzGetGlobalInfo @63 |
|
||||||
unzGetCurrentFileInfo @64 |
|
||||||
unzGoToFirstFile @65 |
|
||||||
unzGoToNextFile @66 |
|
||||||
unzOpenCurrentFile @67 |
|
||||||
unzReadCurrentFile @68 |
|
||||||
unzOpenCurrentFile3 @69 |
|
||||||
unztell @70 |
|
||||||
unzeof @71 |
|
||||||
unzCloseCurrentFile @72 |
|
||||||
unzGetGlobalComment @73 |
|
||||||
unzStringFileNameCompare @74 |
|
||||||
unzLocateFile @75 |
|
||||||
unzGetLocalExtrafield @76 |
|
||||||
unzOpen2 @77 |
|
||||||
unzOpenCurrentFile2 @78 |
|
||||||
unzOpenCurrentFilePassword @79 |
|
||||||
|
|
||||||
zipOpen @80 |
|
||||||
zipOpenNewFileInZip @81 |
|
||||||
zipWriteInFileInZip @82 |
|
||||||
zipCloseFileInZip @83 |
|
||||||
zipClose @84 |
|
||||||
zipOpenNewFileInZip2 @86 |
|
||||||
zipCloseFileInZipRaw @87 |
|
||||||
zipOpen2 @88 |
|
||||||
zipOpenNewFileInZip3 @89 |
|
||||||
|
|
||||||
unzGetFilePos @100 |
|
||||||
unzGoToFilePos @101 |
|
||||||
|
|
||||||
fill_win32_filefunc @110 |
|
||||||
fill_win32_filefunc64 @111 |
|
||||||
fill_win32_filefunc64A @112 |
|
||||||
fill_win32_filefunc64W @113 |
|
||||||
|
|
||||||
; quick hack by hkuno@microhouse.co.jp |
|
||||||
unzOpen64 @120 |
|
||||||
unzOpen2_64 @121 |
|
||||||
unzGetGlobalInfo64 @122 |
|
||||||
unzGetCurrentFileInfo64 @124 |
|
||||||
unzGetCurrentFileZStreamPos64 @125 |
|
||||||
unztell64 @126 |
|
||||||
unzGetFilePos64 @127 |
|
||||||
unzGoToFilePos64 @128 |
|
||||||
|
|
||||||
zipOpen64 @130 |
|
||||||
zipOpen2_64 @131 |
|
||||||
zipOpenNewFileInZip64 @132 |
|
||||||
zipOpenNewFileInZip2_64 @133 |
|
||||||
zipOpenNewFileInZip3_64 @134 |
|
||||||
zipOpenNewFileInZip4_64 @135 |
|
||||||
zipCloseFileInZipRaw64 @136 |
|
||||||
; end hack |
|
@ -0,0 +1,416 @@ |
|||||||
|
/* zconf.h -- configuration of the zlib compression library
|
||||||
|
* Copyright (C) 1995-2010 Jean-loup Gailly. |
||||||
|
* For conditions of distribution and use, see copyright notice in zlib.h |
||||||
|
*/ |
||||||
|
|
||||||
|
/* @(#) $Id$ */ |
||||||
|
|
||||||
|
#ifndef ZCONF_H |
||||||
|
#define ZCONF_H |
||||||
|
|
||||||
|
/*
|
||||||
|
* If you *really* need a unique prefix for all types and library functions, |
||||||
|
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. |
||||||
|
* Even better than compiling with -DZ_PREFIX would be to use configure to set |
||||||
|
* this permanently in zconf.h using "./configure --zprefix". |
||||||
|
*/ |
||||||
|
#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ |
||||||
|
|
||||||
|
/* all linked symbols */ |
||||||
|
# define _dist_code z__dist_code |
||||||
|
# define _length_code z__length_code |
||||||
|
# define _tr_align z__tr_align |
||||||
|
# define _tr_flush_block z__tr_flush_block |
||||||
|
# define _tr_init z__tr_init |
||||||
|
# define _tr_stored_block z__tr_stored_block |
||||||
|
# define _tr_tally z__tr_tally |
||||||
|
# define adler32 z_adler32 |
||||||
|
# define adler32_combine z_adler32_combine |
||||||
|
# define adler32_combine64 z_adler32_combine64 |
||||||
|
# define compress z_compress |
||||||
|
# define compress2 z_compress2 |
||||||
|
# define compressBound z_compressBound |
||||||
|
# define crc32 z_crc32 |
||||||
|
# define crc32_combine z_crc32_combine |
||||||
|
# define crc32_combine64 z_crc32_combine64 |
||||||
|
# define deflate z_deflate |
||||||
|
# define deflateBound z_deflateBound |
||||||
|
# define deflateCopy z_deflateCopy |
||||||
|
# define deflateEnd z_deflateEnd |
||||||
|
# define deflateInit2_ z_deflateInit2_ |
||||||
|
# define deflateInit_ z_deflateInit_ |
||||||
|
# define deflateParams z_deflateParams |
||||||
|
# define deflatePrime z_deflatePrime |
||||||
|
# define deflateReset z_deflateReset |
||||||
|
# define deflateSetDictionary z_deflateSetDictionary |
||||||
|
# define deflateSetHeader z_deflateSetHeader |
||||||
|
# define deflateTune z_deflateTune |
||||||
|
# define deflate_copyright z_deflate_copyright |
||||||
|
# define get_crc_table z_get_crc_table |
||||||
|
# define gz_error z_gz_error |
||||||
|
# define gz_intmax z_gz_intmax |
||||||
|
# define gz_strwinerror z_gz_strwinerror |
||||||
|
# define gzbuffer z_gzbuffer |
||||||
|
# define gzclearerr z_gzclearerr |
||||||
|
# define gzclose z_gzclose |
||||||
|
# define gzclose_r z_gzclose_r |
||||||
|
# define gzclose_w z_gzclose_w |
||||||
|
# define gzdirect z_gzdirect |
||||||
|
# define gzdopen z_gzdopen |
||||||
|
# define gzeof z_gzeof |
||||||
|
# define gzerror z_gzerror |
||||||
|
# define gzflush z_gzflush |
||||||
|
# define gzgetc z_gzgetc |
||||||
|
# define gzgets z_gzgets |
||||||
|
# define gzoffset z_gzoffset |
||||||
|
# define gzoffset64 z_gzoffset64 |
||||||
|
# define gzopen z_gzopen |
||||||
|
# define gzopen64 z_gzopen64 |
||||||
|
# define gzprintf z_gzprintf |
||||||
|
# define gzputc z_gzputc |
||||||
|
# define gzputs z_gzputs |
||||||
|
# define gzread z_gzread |
||||||
|
# define gzrewind z_gzrewind |
||||||
|
# define gzseek z_gzseek |
||||||
|
# define gzseek64 z_gzseek64 |
||||||
|
# define gzsetparams z_gzsetparams |
||||||
|
# define gztell z_gztell |
||||||
|
# define gztell64 z_gztell64 |
||||||
|
# define gzungetc z_gzungetc |
||||||
|
# define gzwrite z_gzwrite |
||||||
|
# define inflate z_inflate |
||||||
|
# define inflateBack z_inflateBack |
||||||
|
# define inflateBackEnd z_inflateBackEnd |
||||||
|
# define inflateBackInit_ z_inflateBackInit_ |
||||||
|
# define inflateCopy z_inflateCopy |
||||||
|
# define inflateEnd z_inflateEnd |
||||||
|
# define inflateGetHeader z_inflateGetHeader |
||||||
|
# define inflateInit2_ z_inflateInit2_ |
||||||
|
# define inflateInit_ z_inflateInit_ |
||||||
|
# define inflateMark z_inflateMark |
||||||
|
# define inflatePrime z_inflatePrime |
||||||
|
# define inflateReset z_inflateReset |
||||||
|
# define inflateReset2 z_inflateReset2 |
||||||
|
# define inflateSetDictionary z_inflateSetDictionary |
||||||
|
# define inflateSync z_inflateSync |
||||||
|
# define inflateSyncPoint z_inflateSyncPoint |
||||||
|
# define inflateUndermine z_inflateUndermine |
||||||
|
# define inflate_copyright z_inflate_copyright |
||||||
|
# define inflate_fast z_inflate_fast |
||||||
|
# define inflate_table z_inflate_table |
||||||
|
# define uncompress z_uncompress |
||||||
|
# define zError z_zError |
||||||
|
# define zcalloc z_zcalloc |
||||||
|
# define zcfree z_zcfree |
||||||
|
# define zlibCompileFlags z_zlibCompileFlags |
||||||
|
# define zlibVersion z_zlibVersion |
||||||
|
|
||||||
|
/* all zlib typedefs in zlib.h and zconf.h */ |
||||||
|
# define Byte z_Byte |
||||||
|
# define Bytef z_Bytef |
||||||
|
# define alloc_func z_alloc_func |
||||||
|
# define charf z_charf |
||||||
|
# define free_func z_free_func |
||||||
|
# define gzFile z_gzFile |
||||||
|
# define gz_header z_gz_header |
||||||
|
# define gz_headerp z_gz_headerp |
||||||
|
# define in_func z_in_func |
||||||
|
# define intf z_intf |
||||||
|
# define out_func z_out_func |
||||||
|
# define uInt z_uInt |
||||||
|
# define uIntf z_uIntf |
||||||
|
# define uLong z_uLong |
||||||
|
# define uLongf z_uLongf |
||||||
|
# define voidp z_voidp |
||||||
|
# define voidpc z_voidpc |
||||||
|
# define voidpf z_voidpf |
||||||
|
|
||||||
|
/* all zlib structs in zlib.h and zconf.h */ |
||||||
|
# define gz_header_s z_gz_header_s |
||||||
|
# define internal_state z_internal_state |
||||||
|
|
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(__MSDOS__) && !defined(MSDOS) |
||||||
|
# define MSDOS |
||||||
|
#endif |
||||||
|
#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) |
||||||
|
# define OS2 |
||||||
|
#endif |
||||||
|
#if defined(_WINDOWS) && !defined(WINDOWS) |
||||||
|
# define WINDOWS |
||||||
|
#endif |
||||||
|
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) |
||||||
|
# ifndef WIN32 |
||||||
|
# define WIN32 |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) |
||||||
|
# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) |
||||||
|
# ifndef SYS16BIT |
||||||
|
# define SYS16BIT |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
/*
|
||||||
|
* Compile with -DMAXSEG_64K if the alloc function cannot allocate more |
||||||
|
* than 64k bytes at a time (needed on systems with 16-bit int). |
||||||
|
*/ |
||||||
|
#ifdef SYS16BIT |
||||||
|
# define MAXSEG_64K |
||||||
|
#endif |
||||||
|
#ifdef MSDOS |
||||||
|
# define UNALIGNED_OK |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef __STDC_VERSION__ |
||||||
|
# ifndef STDC |
||||||
|
# define STDC |
||||||
|
# endif |
||||||
|
# if __STDC_VERSION__ >= 199901L |
||||||
|
# ifndef STDC99 |
||||||
|
# define STDC99 |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef STDC |
||||||
|
# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ |
||||||
|
# define const /* note: need a more gentle solution here */ |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Some Mac compilers merge all .h files incorrectly: */ |
||||||
|
#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) |
||||||
|
# define NO_DUMMY_DECL |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Maximum value for memLevel in deflateInit2 */ |
||||||
|
#ifndef MAX_MEM_LEVEL |
||||||
|
# ifdef MAXSEG_64K |
||||||
|
# define MAX_MEM_LEVEL 8 |
||||||
|
# else |
||||||
|
# define MAX_MEM_LEVEL 9 |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Maximum value for windowBits in deflateInit2 and inflateInit2.
|
||||||
|
* WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files |
||||||
|
* created by gzip. (Files created by minigzip can still be extracted by |
||||||
|
* gzip.) |
||||||
|
*/ |
||||||
|
#ifndef MAX_WBITS |
||||||
|
# define MAX_WBITS 15 /* 32K LZ77 window */ |
||||||
|
#endif |
||||||
|
|
||||||
|
/* The memory requirements for deflate are (in bytes):
|
||||||
|
(1 << (windowBits+2)) + (1 << (memLevel+9)) |
||||||
|
that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) |
||||||
|
plus a few kilobytes for small objects. For example, if you want to reduce |
||||||
|
the default memory requirements from 256K to 128K, compile with |
||||||
|
make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" |
||||||
|
Of course this will generally degrade compression (there's no free lunch). |
||||||
|
|
||||||
|
The memory requirements for inflate are (in bytes) 1 << windowBits |
||||||
|
that is, 32K for windowBits=15 (default value) plus a few kilobytes |
||||||
|
for small objects. |
||||||
|
*/ |
||||||
|
|
||||||
|
/* Type declarations */ |
||||||
|
|
||||||
|
#ifndef OF /* function prototypes */ |
||||||
|
# ifdef STDC |
||||||
|
# define OF(args) args |
||||||
|
# else |
||||||
|
# define OF(args) () |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
/* The following definitions for FAR are needed only for MSDOS mixed
|
||||||
|
* model programming (small or medium model with some far allocations). |
||||||
|
* This was tested only with MSC; for other MSDOS compilers you may have |
||||||
|
* to define NO_MEMCPY in zutil.h. If you don't need the mixed model, |
||||||
|
* just define FAR to be empty. |
||||||
|
*/ |
||||||
|
#ifdef SYS16BIT |
||||||
|
# if defined(M_I86SM) || defined(M_I86MM) |
||||||
|
/* MSC small or medium model */ |
||||||
|
# define SMALL_MEDIUM |
||||||
|
# ifdef _MSC_VER |
||||||
|
# define FAR _far |
||||||
|
# else |
||||||
|
# define FAR far |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
# if (defined(__SMALL__) || defined(__MEDIUM__)) |
||||||
|
/* Turbo C small or medium model */ |
||||||
|
# define SMALL_MEDIUM |
||||||
|
# ifdef __BORLANDC__ |
||||||
|
# define FAR _far |
||||||
|
# else |
||||||
|
# define FAR far |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(WINDOWS) || defined(WIN32) |
||||||
|
/* If building or using zlib as a DLL, define ZLIB_DLL.
|
||||||
|
* This is not mandatory, but it offers a little performance increase. |
||||||
|
*/ |
||||||
|
# ifdef ZLIB_DLL |
||||||
|
# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) |
||||||
|
# ifdef ZLIB_INTERNAL |
||||||
|
# define ZEXTERN extern __declspec(dllexport) |
||||||
|
# else |
||||||
|
# define ZEXTERN extern __declspec(dllimport) |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
# endif /* ZLIB_DLL */ |
||||||
|
/* If building or using zlib with the WINAPI/WINAPIV calling convention,
|
||||||
|
* define ZLIB_WINAPI. |
||||||
|
* Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. |
||||||
|
*/ |
||||||
|
# ifdef ZLIB_WINAPI |
||||||
|
# ifdef FAR |
||||||
|
# undef FAR |
||||||
|
# endif |
||||||
|
# include <windows.h> |
||||||
|
/* No need for _export, use ZLIB.DEF instead. */ |
||||||
|
/* For complete Windows compatibility, use WINAPI, not __stdcall. */ |
||||||
|
# define ZEXPORT WINAPI |
||||||
|
# ifdef WIN32 |
||||||
|
# define ZEXPORTVA WINAPIV |
||||||
|
# else |
||||||
|
# define ZEXPORTVA FAR CDECL |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined (__BEOS__) |
||||||
|
# ifdef ZLIB_DLL |
||||||
|
# ifdef ZLIB_INTERNAL |
||||||
|
# define ZEXPORT __declspec(dllexport) |
||||||
|
# define ZEXPORTVA __declspec(dllexport) |
||||||
|
# else |
||||||
|
# define ZEXPORT __declspec(dllimport) |
||||||
|
# define ZEXPORTVA __declspec(dllimport) |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef HAVE_VISIBILITY_PRAGMA |
||||||
|
# define ZEXTERN __attribute__((visibility ("default"))) extern |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef ZEXTERN |
||||||
|
# define ZEXTERN extern |
||||||
|
#endif |
||||||
|
#ifndef ZEXPORT |
||||||
|
# define ZEXPORT |
||||||
|
#endif |
||||||
|
#ifndef ZEXPORTVA |
||||||
|
# define ZEXPORTVA |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef FAR |
||||||
|
# define FAR |
||||||
|
#endif |
||||||
|
|
||||||
|
#if !defined(__MACTYPES__) |
||||||
|
typedef unsigned char Byte; /* 8 bits */ |
||||||
|
#endif |
||||||
|
typedef unsigned int uInt; /* 16 bits or more */ |
||||||
|
typedef unsigned long uLong; /* 32 bits or more */ |
||||||
|
|
||||||
|
#ifdef SMALL_MEDIUM |
||||||
|
/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ |
||||||
|
# define Bytef Byte FAR |
||||||
|
#else |
||||||
|
typedef Byte FAR Bytef; |
||||||
|
#endif |
||||||
|
typedef char FAR charf; |
||||||
|
typedef int FAR intf; |
||||||
|
typedef uInt FAR uIntf; |
||||||
|
typedef uLong FAR uLongf; |
||||||
|
|
||||||
|
#ifdef STDC |
||||||
|
typedef void const *voidpc; |
||||||
|
typedef void FAR *voidpf; |
||||||
|
typedef void *voidp; |
||||||
|
#else |
||||||
|
typedef Byte const *voidpc; |
||||||
|
typedef Byte FAR *voidpf; |
||||||
|
typedef Byte *voidp; |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ |
||||||
|
# define Z_HAVE_UNISTD_H |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef Z_HAVE_UNISTD_H |
||||||
|
# include <sys/types.h> /* for off_t */ |
||||||
|
# include <unistd.h> /* for SEEK_* and off_t */ |
||||||
|
# ifdef VMS |
||||||
|
# include <unixio.h> /* for off_t */ |
||||||
|
# endif |
||||||
|
# ifndef z_off_t |
||||||
|
# define z_off_t off_t |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef _LARGEFILE64_SOURCE |
||||||
|
# include <sys/types.h> |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef SEEK_SET |
||||||
|
# define SEEK_SET 0 /* Seek from beginning of file. */ |
||||||
|
# define SEEK_CUR 1 /* Seek from current position. */ |
||||||
|
# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ |
||||||
|
#endif |
||||||
|
#ifndef z_off_t |
||||||
|
# define z_off_t long |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(__OS400__) |
||||||
|
# define NO_vsnprintf |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(__MVS__) |
||||||
|
# define NO_vsnprintf |
||||||
|
#endif |
||||||
|
|
||||||
|
/* MVS linker does not support external names larger than 8 bytes */ |
||||||
|
#if defined(__MVS__) |
||||||
|
#pragma map(deflateInit_,"DEIN") |
||||||
|
#pragma map(deflateInit2_,"DEIN2") |
||||||
|
#pragma map(deflateEnd,"DEEND") |
||||||
|
#pragma map(deflateBound,"DEBND") |
||||||
|
#pragma map(inflateInit_,"ININ") |
||||||
|
#pragma map(inflateInit2_,"ININ2") |
||||||
|
#pragma map(inflateEnd,"INEND") |
||||||
|
#pragma map(inflateSync,"INSY") |
||||||
|
#pragma map(inflateSetDictionary,"INSEDI") |
||||||
|
#pragma map(compressBound,"CMBND") |
||||||
|
#pragma map(inflate_table,"INTABL") |
||||||
|
#pragma map(inflate_fast,"INFA") |
||||||
|
#pragma map(inflate_copyright,"INCOPY") |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif /* ZCONF_H */ |
@ -0,0 +1,418 @@ |
|||||||
|
/* zconf.h -- configuration of the zlib compression library |
||||||
|
* Copyright (C) 1995-2010 Jean-loup Gailly. |
||||||
|
* For conditions of distribution and use, see copyright notice in zlib.h |
||||||
|
*/ |
||||||
|
|
||||||
|
/* @(#) $Id$ */ |
||||||
|
|
||||||
|
#ifndef ZCONF_H |
||||||
|
#define ZCONF_H |
||||||
|
#cmakedefine Z_PREFIX |
||||||
|
#cmakedefine Z_HAVE_UNISTD_H |
||||||
|
|
||||||
|
/* |
||||||
|
* If you *really* need a unique prefix for all types and library functions, |
||||||
|
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. |
||||||
|
* Even better than compiling with -DZ_PREFIX would be to use configure to set |
||||||
|
* this permanently in zconf.h using "./configure --zprefix". |
||||||
|
*/ |
||||||
|
#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ |
||||||
|
|
||||||
|
/* all linked symbols */ |
||||||
|
# define _dist_code z__dist_code |
||||||
|
# define _length_code z__length_code |
||||||
|
# define _tr_align z__tr_align |
||||||
|
# define _tr_flush_block z__tr_flush_block |
||||||
|
# define _tr_init z__tr_init |
||||||
|
# define _tr_stored_block z__tr_stored_block |
||||||
|
# define _tr_tally z__tr_tally |
||||||
|
# define adler32 z_adler32 |
||||||
|
# define adler32_combine z_adler32_combine |
||||||
|
# define adler32_combine64 z_adler32_combine64 |
||||||
|
# define compress z_compress |
||||||
|
# define compress2 z_compress2 |
||||||
|
# define compressBound z_compressBound |
||||||
|
# define crc32 z_crc32 |
||||||
|
# define crc32_combine z_crc32_combine |
||||||
|
# define crc32_combine64 z_crc32_combine64 |
||||||
|
# define deflate z_deflate |
||||||
|
# define deflateBound z_deflateBound |
||||||
|
# define deflateCopy z_deflateCopy |
||||||
|
# define deflateEnd z_deflateEnd |
||||||
|
# define deflateInit2_ z_deflateInit2_ |
||||||
|
# define deflateInit_ z_deflateInit_ |
||||||
|
# define deflateParams z_deflateParams |
||||||
|
# define deflatePrime z_deflatePrime |
||||||
|
# define deflateReset z_deflateReset |
||||||
|
# define deflateSetDictionary z_deflateSetDictionary |
||||||
|
# define deflateSetHeader z_deflateSetHeader |
||||||
|
# define deflateTune z_deflateTune |
||||||
|
# define deflate_copyright z_deflate_copyright |
||||||
|
# define get_crc_table z_get_crc_table |
||||||
|
# define gz_error z_gz_error |
||||||
|
# define gz_intmax z_gz_intmax |
||||||
|
# define gz_strwinerror z_gz_strwinerror |
||||||
|
# define gzbuffer z_gzbuffer |
||||||
|
# define gzclearerr z_gzclearerr |
||||||
|
# define gzclose z_gzclose |
||||||
|
# define gzclose_r z_gzclose_r |
||||||
|
# define gzclose_w z_gzclose_w |
||||||
|
# define gzdirect z_gzdirect |
||||||
|
# define gzdopen z_gzdopen |
||||||
|
# define gzeof z_gzeof |
||||||
|
# define gzerror z_gzerror |
||||||
|
# define gzflush z_gzflush |
||||||
|
# define gzgetc z_gzgetc |
||||||
|
# define gzgets z_gzgets |
||||||
|
# define gzoffset z_gzoffset |
||||||
|
# define gzoffset64 z_gzoffset64 |
||||||
|
# define gzopen z_gzopen |
||||||
|
# define gzopen64 z_gzopen64 |
||||||
|
# define gzprintf z_gzprintf |
||||||
|
# define gzputc z_gzputc |
||||||
|
# define gzputs z_gzputs |
||||||
|
# define gzread z_gzread |
||||||
|
# define gzrewind z_gzrewind |
||||||
|
# define gzseek z_gzseek |
||||||
|
# define gzseek64 z_gzseek64 |
||||||
|
# define gzsetparams z_gzsetparams |
||||||
|
# define gztell z_gztell |
||||||
|
# define gztell64 z_gztell64 |
||||||
|
# define gzungetc z_gzungetc |
||||||
|
# define gzwrite z_gzwrite |
||||||
|
# define inflate z_inflate |
||||||
|
# define inflateBack z_inflateBack |
||||||
|
# define inflateBackEnd z_inflateBackEnd |
||||||
|
# define inflateBackInit_ z_inflateBackInit_ |
||||||
|
# define inflateCopy z_inflateCopy |
||||||
|
# define inflateEnd z_inflateEnd |
||||||
|
# define inflateGetHeader z_inflateGetHeader |
||||||
|
# define inflateInit2_ z_inflateInit2_ |
||||||
|
# define inflateInit_ z_inflateInit_ |
||||||
|
# define inflateMark z_inflateMark |
||||||
|
# define inflatePrime z_inflatePrime |
||||||
|
# define inflateReset z_inflateReset |
||||||
|
# define inflateReset2 z_inflateReset2 |
||||||
|
# define inflateSetDictionary z_inflateSetDictionary |
||||||
|
# define inflateSync z_inflateSync |
||||||
|
# define inflateSyncPoint z_inflateSyncPoint |
||||||
|
# define inflateUndermine z_inflateUndermine |
||||||
|
# define inflate_copyright z_inflate_copyright |
||||||
|
# define inflate_fast z_inflate_fast |
||||||
|
# define inflate_table z_inflate_table |
||||||
|
# define uncompress z_uncompress |
||||||
|
# define zError z_zError |
||||||
|
# define zcalloc z_zcalloc |
||||||
|
# define zcfree z_zcfree |
||||||
|
# define zlibCompileFlags z_zlibCompileFlags |
||||||
|
# define zlibVersion z_zlibVersion |
||||||
|
|
||||||
|
/* all zlib typedefs in zlib.h and zconf.h */ |
||||||
|
# define Byte z_Byte |
||||||
|
# define Bytef z_Bytef |
||||||
|
# define alloc_func z_alloc_func |
||||||
|
# define charf z_charf |
||||||
|
# define free_func z_free_func |
||||||
|
# define gzFile z_gzFile |
||||||
|
# define gz_header z_gz_header |
||||||
|
# define gz_headerp z_gz_headerp |
||||||
|
# define in_func z_in_func |
||||||
|
# define intf z_intf |
||||||
|
# define out_func z_out_func |
||||||
|
# define uInt z_uInt |
||||||
|
# define uIntf z_uIntf |
||||||
|
# define uLong z_uLong |
||||||
|
# define uLongf z_uLongf |
||||||
|
# define voidp z_voidp |
||||||
|
# define voidpc z_voidpc |
||||||
|
# define voidpf z_voidpf |
||||||
|
|
||||||
|
/* all zlib structs in zlib.h and zconf.h */ |
||||||
|
# define gz_header_s z_gz_header_s |
||||||
|
# define internal_state z_internal_state |
||||||
|
|
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(__MSDOS__) && !defined(MSDOS) |
||||||
|
# define MSDOS |
||||||
|
#endif |
||||||
|
#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) |
||||||
|
# define OS2 |
||||||
|
#endif |
||||||
|
#if defined(_WINDOWS) && !defined(WINDOWS) |
||||||
|
# define WINDOWS |
||||||
|
#endif |
||||||
|
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) |
||||||
|
# ifndef WIN32 |
||||||
|
# define WIN32 |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) |
||||||
|
# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) |
||||||
|
# ifndef SYS16BIT |
||||||
|
# define SYS16BIT |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
/* |
||||||
|
* Compile with -DMAXSEG_64K if the alloc function cannot allocate more |
||||||
|
* than 64k bytes at a time (needed on systems with 16-bit int). |
||||||
|
*/ |
||||||
|
#ifdef SYS16BIT |
||||||
|
# define MAXSEG_64K |
||||||
|
#endif |
||||||
|
#ifdef MSDOS |
||||||
|
# define UNALIGNED_OK |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef __STDC_VERSION__ |
||||||
|
# ifndef STDC |
||||||
|
# define STDC |
||||||
|
# endif |
||||||
|
# if __STDC_VERSION__ >= 199901L |
||||||
|
# ifndef STDC99 |
||||||
|
# define STDC99 |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ |
||||||
|
# define STDC |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef STDC |
||||||
|
# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ |
||||||
|
# define const /* note: need a more gentle solution here */ |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Some Mac compilers merge all .h files incorrectly: */ |
||||||
|
#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) |
||||||
|
# define NO_DUMMY_DECL |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Maximum value for memLevel in deflateInit2 */ |
||||||
|
#ifndef MAX_MEM_LEVEL |
||||||
|
# ifdef MAXSEG_64K |
||||||
|
# define MAX_MEM_LEVEL 8 |
||||||
|
# else |
||||||
|
# define MAX_MEM_LEVEL 9 |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Maximum value for windowBits in deflateInit2 and inflateInit2. |
||||||
|
* WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files |
||||||
|
* created by gzip. (Files created by minigzip can still be extracted by |
||||||
|
* gzip.) |
||||||
|
*/ |
||||||
|
#ifndef MAX_WBITS |
||||||
|
# define MAX_WBITS 15 /* 32K LZ77 window */ |
||||||
|
#endif |
||||||
|
|
||||||
|
/* The memory requirements for deflate are (in bytes): |
||||||
|
(1 << (windowBits+2)) + (1 << (memLevel+9)) |
||||||
|
that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) |
||||||
|
plus a few kilobytes for small objects. For example, if you want to reduce |
||||||
|
the default memory requirements from 256K to 128K, compile with |
||||||
|
make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" |
||||||
|
Of course this will generally degrade compression (there's no free lunch). |
||||||
|
|
||||||
|
The memory requirements for inflate are (in bytes) 1 << windowBits |
||||||
|
that is, 32K for windowBits=15 (default value) plus a few kilobytes |
||||||
|
for small objects. |
||||||
|
*/ |
||||||
|
|
||||||
|
/* Type declarations */ |
||||||
|
|
||||||
|
#ifndef OF /* function prototypes */ |
||||||
|
# ifdef STDC |
||||||
|
# define OF(args) args |
||||||
|
# else |
||||||
|
# define OF(args) () |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
/* The following definitions for FAR are needed only for MSDOS mixed |
||||||
|
* model programming (small or medium model with some far allocations). |
||||||
|
* This was tested only with MSC; for other MSDOS compilers you may have |
||||||
|
* to define NO_MEMCPY in zutil.h. If you don't need the mixed model, |
||||||
|
* just define FAR to be empty. |
||||||
|
*/ |
||||||
|
#ifdef SYS16BIT |
||||||
|
# if defined(M_I86SM) || defined(M_I86MM) |
||||||
|
/* MSC small or medium model */ |
||||||
|
# define SMALL_MEDIUM |
||||||
|
# ifdef _MSC_VER |
||||||
|
# define FAR _far |
||||||
|
# else |
||||||
|
# define FAR far |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
# if (defined(__SMALL__) || defined(__MEDIUM__)) |
||||||
|
/* Turbo C small or medium model */ |
||||||
|
# define SMALL_MEDIUM |
||||||
|
# ifdef __BORLANDC__ |
||||||
|
# define FAR _far |
||||||
|
# else |
||||||
|
# define FAR far |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(WINDOWS) || defined(WIN32) |
||||||
|
/* If building or using zlib as a DLL, define ZLIB_DLL. |
||||||
|
* This is not mandatory, but it offers a little performance increase. |
||||||
|
*/ |
||||||
|
# ifdef ZLIB_DLL |
||||||
|
# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) |
||||||
|
# ifdef ZLIB_INTERNAL |
||||||
|
# define ZEXTERN extern __declspec(dllexport) |
||||||
|
# else |
||||||
|
# define ZEXTERN extern __declspec(dllimport) |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
# endif /* ZLIB_DLL */ |
||||||
|
/* If building or using zlib with the WINAPI/WINAPIV calling convention, |
||||||
|
* define ZLIB_WINAPI. |
||||||
|
* Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. |
||||||
|
*/ |
||||||
|
# ifdef ZLIB_WINAPI |
||||||
|
# ifdef FAR |
||||||
|
# undef FAR |
||||||
|
# endif |
||||||
|
# include <windows.h> |
||||||
|
/* No need for _export, use ZLIB.DEF instead. */ |
||||||
|
/* For complete Windows compatibility, use WINAPI, not __stdcall. */ |
||||||
|
# define ZEXPORT WINAPI |
||||||
|
# ifdef WIN32 |
||||||
|
# define ZEXPORTVA WINAPIV |
||||||
|
# else |
||||||
|
# define ZEXPORTVA FAR CDECL |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined (__BEOS__) |
||||||
|
# ifdef ZLIB_DLL |
||||||
|
# ifdef ZLIB_INTERNAL |
||||||
|
# define ZEXPORT __declspec(dllexport) |
||||||
|
# define ZEXPORTVA __declspec(dllexport) |
||||||
|
# else |
||||||
|
# define ZEXPORT __declspec(dllimport) |
||||||
|
# define ZEXPORTVA __declspec(dllimport) |
||||||
|
# endif |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef HAVE_VISIBILITY_PRAGMA |
||||||
|
# define ZEXTERN __attribute__((visibility ("default"))) extern |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef ZEXTERN |
||||||
|
# define ZEXTERN extern |
||||||
|
#endif |
||||||
|
#ifndef ZEXPORT |
||||||
|
# define ZEXPORT |
||||||
|
#endif |
||||||
|
#ifndef ZEXPORTVA |
||||||
|
# define ZEXPORTVA |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef FAR |
||||||
|
# define FAR |
||||||
|
#endif |
||||||
|
|
||||||
|
#if !defined(__MACTYPES__) |
||||||
|
typedef unsigned char Byte; /* 8 bits */ |
||||||
|
#endif |
||||||
|
typedef unsigned int uInt; /* 16 bits or more */ |
||||||
|
typedef unsigned long uLong; /* 32 bits or more */ |
||||||
|
|
||||||
|
#ifdef SMALL_MEDIUM |
||||||
|
/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ |
||||||
|
# define Bytef Byte FAR |
||||||
|
#else |
||||||
|
typedef Byte FAR Bytef; |
||||||
|
#endif |
||||||
|
typedef char FAR charf; |
||||||
|
typedef int FAR intf; |
||||||
|
typedef uInt FAR uIntf; |
||||||
|
typedef uLong FAR uLongf; |
||||||
|
|
||||||
|
#ifdef STDC |
||||||
|
typedef void const *voidpc; |
||||||
|
typedef void FAR *voidpf; |
||||||
|
typedef void *voidp; |
||||||
|
#else |
||||||
|
typedef Byte const *voidpc; |
||||||
|
typedef Byte FAR *voidpf; |
||||||
|
typedef Byte *voidp; |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ |
||||||
|
# define Z_HAVE_UNISTD_H |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef Z_HAVE_UNISTD_H |
||||||
|
# include <sys/types.h> /* for off_t */ |
||||||
|
# include <unistd.h> /* for SEEK_* and off_t */ |
||||||
|
# ifdef VMS |
||||||
|
# include <unixio.h> /* for off_t */ |
||||||
|
# endif |
||||||
|
# ifndef z_off_t |
||||||
|
# define z_off_t off_t |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef _LARGEFILE64_SOURCE |
||||||
|
# include <sys/types.h> |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef SEEK_SET |
||||||
|
# define SEEK_SET 0 /* Seek from beginning of file. */ |
||||||
|
# define SEEK_CUR 1 /* Seek from current position. */ |
||||||
|
# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ |
||||||
|
#endif |
||||||
|
#ifndef z_off_t |
||||||
|
# define z_off_t long |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(__OS400__) |
||||||
|
# define NO_vsnprintf |
||||||
|
#endif |
||||||
|
|
||||||
|
#if defined(__MVS__) |
||||||
|
# define NO_vsnprintf |
||||||
|
#endif |
||||||
|
|
||||||
|
/* MVS linker does not support external names larger than 8 bytes */ |
||||||
|
#if defined(__MVS__) |
||||||
|
#pragma map(deflateInit_,"DEIN") |
||||||
|
#pragma map(deflateInit2_,"DEIN2") |
||||||
|
#pragma map(deflateEnd,"DEEND") |
||||||
|
#pragma map(deflateBound,"DEBND") |
||||||
|
#pragma map(inflateInit_,"ININ") |
||||||
|
#pragma map(inflateInit2_,"ININ2") |
||||||
|
#pragma map(inflateEnd,"INEND") |
||||||
|
#pragma map(inflateSync,"INSY") |
||||||
|
#pragma map(inflateSetDictionary,"INSEDI") |
||||||
|
#pragma map(compressBound,"CMBND") |
||||||
|
#pragma map(inflate_table,"INTABL") |
||||||
|
#pragma map(inflate_fast,"INFA") |
||||||
|
#pragma map(inflate_copyright,"INCOPY") |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif /* ZCONF_H */ |
Binary file not shown.
Loading…
Reference in new issue