parent
7751bd4c71
commit
67cc20d004
67 changed files with 2822 additions and 3019 deletions
@ -1,2 +1,5 @@ |
||||
all: |
||||
-@echo "Use ./configure first. Thank you."
|
||||
|
||||
distclean: |
||||
make -f Makefile.in distclean
|
||||
|
@ -1,43 +0,0 @@ |
||||
This is a patched version of zlib modified to use |
||||
Pentium-optimized assembly code in the deflation algorithm. The files |
||||
changed/added by this patch are: |
||||
|
||||
README.586 |
||||
match.S |
||||
|
||||
The effectiveness of these modifications is a bit marginal, as the the |
||||
program's bottleneck seems to be mostly L1-cache contention, for which |
||||
there is no real way to work around without rewriting the basic |
||||
algorithm. The speedup on average is around 5-10% (which is generally |
||||
less than the amount of variance between subsequent executions). |
||||
However, when used at level 9 compression, the cache contention can |
||||
drop enough for the assembly version to achieve 10-20% speedup (and |
||||
sometimes more, depending on the amount of overall redundancy in the |
||||
files). Even here, though, cache contention can still be the limiting |
||||
factor, depending on the nature of the program using the zlib library. |
||||
This may also mean that better improvements will be seen on a Pentium |
||||
with MMX, which suffers much less from L1-cache contention, but I have |
||||
not yet verified this. |
||||
|
||||
Note that this code has been tailored for the Pentium in particular, |
||||
and will not perform well on the Pentium Pro (due to the use of a |
||||
partial register in the inner loop). |
||||
|
||||
If you are using an assembler other than GNU as, you will have to |
||||
translate match.S to use your assembler's syntax. (Have fun.) |
||||
|
||||
Brian Raiter |
||||
breadbox@muppetlabs.com |
||||
April, 1998 |
||||
|
||||
|
||||
Added for zlib 1.1.3: |
||||
|
||||
The patches come from |
||||
http://www.muppetlabs.com/~breadbox/software/assembly.html |
||||
|
||||
To compile zlib with this asm file, copy match.S to the zlib directory |
||||
then do: |
||||
|
||||
CFLAGS="-O3 -DASMV" ./configure |
||||
make OBJA=match.o |
@ -1,364 +0,0 @@ |
||||
/* match.s -- Pentium-optimized version of longest_match() |
||||
* Written for zlib 1.1.2 |
||||
* Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
|
||||
* |
||||
* This is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License. |
||||
*/ |
||||
|
||||
#ifndef NO_UNDERLINE |
||||
#define match_init _match_init |
||||
#define longest_match _longest_match |
||||
#endif |
||||
|
||||
#define MAX_MATCH (258) |
||||
#define MIN_MATCH (3) |
||||
#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) |
||||
#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) |
||||
|
||||
/* stack frame offsets */ |
||||
|
||||
#define wmask 0 /* local copy of s->wmask */ |
||||
#define window 4 /* local copy of s->window */ |
||||
#define windowbestlen 8 /* s->window + bestlen */ |
||||
#define chainlenscanend 12 /* high word: current chain len */ |
||||
/* low word: last bytes sought */ |
||||
#define scanstart 16 /* first two bytes of string */ |
||||
#define scanalign 20 /* dword-misalignment of string */ |
||||
#define nicematch 24 /* a good enough match size */ |
||||
#define bestlen 28 /* size of best match so far */ |
||||
#define scan 32 /* ptr to string wanting match */ |
||||
|
||||
#define LocalVarsSize (36) |
||||
/* saved ebx 36 */ |
||||
/* saved edi 40 */ |
||||
/* saved esi 44 */ |
||||
/* saved ebp 48 */ |
||||
/* return address 52 */ |
||||
#define deflatestate 56 /* the function arguments */ |
||||
#define curmatch 60 |
||||
|
||||
/* Offsets for fields in the deflate_state structure. These numbers |
||||
* are calculated from the definition of deflate_state, with the |
||||
* assumption that the compiler will dword-align the fields. (Thus, |
||||
* changing the definition of deflate_state could easily cause this |
||||
* program to crash horribly, without so much as a warning at |
||||
* compile time. Sigh.) |
||||
*/ |
||||
|
||||
/* All the +zlib1222add offsets are due to the addition of fields |
||||
* in zlib in the deflate_state structure since the asm code was first written |
||||
* (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). |
||||
* (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). |
||||
* if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). |
||||
*/ |
||||
|
||||
#define zlib1222add (8) |
||||
|
||||
#define dsWSize (36+zlib1222add) |
||||
#define dsWMask (44+zlib1222add) |
||||
#define dsWindow (48+zlib1222add) |
||||
#define dsPrev (56+zlib1222add) |
||||
#define dsMatchLen (88+zlib1222add) |
||||
#define dsPrevMatch (92+zlib1222add) |
||||
#define dsStrStart (100+zlib1222add) |
||||
#define dsMatchStart (104+zlib1222add) |
||||
#define dsLookahead (108+zlib1222add) |
||||
#define dsPrevLen (112+zlib1222add) |
||||
#define dsMaxChainLen (116+zlib1222add) |
||||
#define dsGoodMatch (132+zlib1222add) |
||||
#define dsNiceMatch (136+zlib1222add) |
||||
|
||||
|
||||
.file "match.S" |
||||
|
||||
.globl match_init, longest_match |
||||
|
||||
.text |
||||
|
||||
/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ |
||||
|
||||
longest_match: |
||||
|
||||
/* Save registers that the compiler may be using, and adjust %esp to */ |
||||
/* make room for our stack frame. */ |
||||
|
||||
pushl %ebp |
||||
pushl %edi |
||||
pushl %esi |
||||
pushl %ebx |
||||
subl $LocalVarsSize, %esp |
||||
|
||||
/* Retrieve the function arguments. %ecx will hold cur_match */ |
||||
/* throughout the entire function. %edx will hold the pointer to the */ |
||||
/* deflate_state structure during the function's setup (before */ |
||||
/* entering the main loop). */ |
||||
|
||||
movl deflatestate(%esp), %edx |
||||
movl curmatch(%esp), %ecx |
||||
|
||||
/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ |
||||
|
||||
movl dsNiceMatch(%edx), %eax |
||||
movl dsLookahead(%edx), %ebx |
||||
cmpl %eax, %ebx |
||||
jl LookaheadLess |
||||
movl %eax, %ebx |
||||
LookaheadLess: movl %ebx, nicematch(%esp) |
||||
|
||||
/* register Bytef *scan = s->window + s->strstart; */ |
||||
|
||||
movl dsWindow(%edx), %esi |
||||
movl %esi, window(%esp) |
||||
movl dsStrStart(%edx), %ebp |
||||
lea (%esi,%ebp), %edi |
||||
movl %edi, scan(%esp) |
||||
|
||||
/* Determine how many bytes the scan ptr is off from being */ |
||||
/* dword-aligned. */ |
||||
|
||||
movl %edi, %eax |
||||
negl %eax |
||||
andl $3, %eax |
||||
movl %eax, scanalign(%esp) |
||||
|
||||
/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ |
||||
/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ |
||||
|
||||
movl dsWSize(%edx), %eax |
||||
subl $MIN_LOOKAHEAD, %eax |
||||
subl %eax, %ebp |
||||
jg LimitPositive |
||||
xorl %ebp, %ebp |
||||
LimitPositive: |
||||
|
||||
/* unsigned chain_length = s->max_chain_length; */ |
||||
/* if (s->prev_length >= s->good_match) { */ |
||||
/* chain_length >>= 2; */ |
||||
/* } */ |
||||
|
||||
movl dsPrevLen(%edx), %eax |
||||
movl dsGoodMatch(%edx), %ebx |
||||
cmpl %ebx, %eax |
||||
movl dsMaxChainLen(%edx), %ebx |
||||
jl LastMatchGood |
||||
shrl $2, %ebx |
||||
LastMatchGood: |
||||
|
||||
/* chainlen is decremented once beforehand so that the function can */ |
||||
/* use the sign flag instead of the zero flag for the exit test. */ |
||||
/* It is then shifted into the high word, to make room for the scanend */ |
||||
/* scanend value, which it will always accompany. */ |
||||
|
||||
decl %ebx |
||||
shll $16, %ebx |
||||
|
||||
/* int best_len = s->prev_length; */ |
||||
|
||||
movl dsPrevLen(%edx), %eax |
||||
movl %eax, bestlen(%esp) |
||||
|
||||
/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ |
||||
|
||||
addl %eax, %esi |
||||
movl %esi, windowbestlen(%esp) |
||||
|
||||
/* register ush scan_start = *(ushf*)scan; */ |
||||
/* register ush scan_end = *(ushf*)(scan+best_len-1); */ |
||||
|
||||
movw (%edi), %bx |
||||
movw %bx, scanstart(%esp) |
||||
movw -1(%edi,%eax), %bx |
||||
movl %ebx, chainlenscanend(%esp) |
||||
|
||||
/* Posf *prev = s->prev; */ |
||||
/* uInt wmask = s->w_mask; */ |
||||
|
||||
movl dsPrev(%edx), %edi |
||||
movl dsWMask(%edx), %edx |
||||
mov %edx, wmask(%esp) |
||||
|
||||
/* Jump into the main loop. */ |
||||
|
||||
jmp LoopEntry |
||||
|
||||
.balign 16
|
||||
|
||||
/* do { |
||||
* match = s->window + cur_match;
|
||||
* if (*(ushf*)(match+best_len-1) != scan_end || |
||||
* *(ushf*)match != scan_start) continue;
|
||||
* [...] |
||||
* } while ((cur_match = prev[cur_match & wmask]) > limit |
||||
* && --chain_length != 0);
|
||||
* |
||||
* Here is the inner loop of the function. The function will spend the |
||||
* majority of its time in this loop, and majority of that time will |
||||
* be spent in the first ten instructions. |
||||
* |
||||
* Within this loop: |
||||
* %ebx = chainlenscanend - i.e., ((chainlen << 16) | scanend) |
||||
* %ecx = curmatch |
||||
* %edx = curmatch & wmask |
||||
* %esi = windowbestlen - i.e., (window + bestlen) |
||||
* %edi = prev |
||||
* %ebp = limit |
||||
* |
||||
* Two optimization notes on the choice of instructions: |
||||
* |
||||
* The first instruction uses a 16-bit address, which costs an extra, |
||||
* unpairable cycle. This is cheaper than doing a 32-bit access and |
||||
* zeroing the high word, due to the 3-cycle misalignment penalty which |
||||
* would occur half the time. This also turns out to be cheaper than |
||||
* doing two separate 8-bit accesses, as the memory is so rarely in the |
||||
* L1 cache. |
||||
* |
||||
* The window buffer, however, apparently spends a lot of time in the |
||||
* cache, and so it is faster to retrieve the word at the end of the |
||||
* match string with two 8-bit loads. The instructions that test the |
||||
* word at the beginning of the match string, however, are executed |
||||
* much less frequently, and there it was cheaper to use 16-bit |
||||
* instructions, which avoided the necessity of saving off and |
||||
* subsequently reloading one of the other registers. |
||||
*/ |
||||
LookupLoop: |
||||
/* 1 U & V */ |
||||
movw (%edi,%edx,2), %cx /* 2 U pipe */ |
||||
movl wmask(%esp), %edx /* 2 V pipe */ |
||||
cmpl %ebp, %ecx /* 3 U pipe */ |
||||
jbe LeaveNow /* 3 V pipe */ |
||||
subl $0x00010000, %ebx /* 4 U pipe */ |
||||
js LeaveNow /* 4 V pipe */ |
||||
LoopEntry: movb -1(%esi,%ecx), %al /* 5 U pipe */ |
||||
andl %ecx, %edx /* 5 V pipe */ |
||||
cmpb %bl, %al /* 6 U pipe */ |
||||
jnz LookupLoop /* 6 V pipe */ |
||||
movb (%esi,%ecx), %ah |
||||
cmpb %bh, %ah |
||||
jnz LookupLoop |
||||
movl window(%esp), %eax |
||||
movw (%eax,%ecx), %ax |
||||
cmpw scanstart(%esp), %ax |
||||
jnz LookupLoop |
||||
|
||||
/* Store the current value of chainlen. */ |
||||
|
||||
movl %ebx, chainlenscanend(%esp) |
||||
|
||||
/* Point %edi to the string under scrutiny, and %esi to the string we */ |
||||
/* are hoping to match it up with. In actuality, %esi and %edi are */ |
||||
/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ |
||||
/* initialized to -(MAX_MATCH_8 - scanalign). */ |
||||
|
||||
movl window(%esp), %esi |
||||
movl scan(%esp), %edi |
||||
addl %ecx, %esi |
||||
movl scanalign(%esp), %eax |
||||
movl $(-MAX_MATCH_8), %edx |
||||
lea MAX_MATCH_8(%edi,%eax), %edi |
||||
lea MAX_MATCH_8(%esi,%eax), %esi |
||||
|
||||
/* Test the strings for equality, 8 bytes at a time. At the end, |
||||
* adjust %edx so that it is offset to the exact byte that mismatched. |
||||
* |
||||
* We already know at this point that the first three bytes of the |
||||
* strings match each other, and they can be safely passed over before |
||||
* starting the compare loop. So what this code does is skip over 0-3 |
||||
* bytes, as much as necessary in order to dword-align the %edi |
||||
* pointer. (%esi will still be misaligned three times out of four.) |
||||
* |
||||
* It should be confessed that this loop usually does not represent |
||||
* much of the total running time. Replacing it with a more |
||||
* straightforward "rep cmpsb" would not drastically degrade |
||||
* performance. |
||||
*/ |
||||
LoopCmps: |
||||
movl (%esi,%edx), %eax |
||||
movl (%edi,%edx), %ebx |
||||
xorl %ebx, %eax |
||||
jnz LeaveLoopCmps |
||||
movl 4(%esi,%edx), %eax |
||||
movl 4(%edi,%edx), %ebx |
||||
xorl %ebx, %eax |
||||
jnz LeaveLoopCmps4 |
||||
addl $8, %edx |
||||
jnz LoopCmps |
||||
jmp LenMaximum |
||||
LeaveLoopCmps4: addl $4, %edx |
||||
LeaveLoopCmps: testl $0x0000FFFF, %eax |
||||
jnz LenLower |
||||
addl $2, %edx |
||||
shrl $16, %eax |
||||
LenLower: subb $1, %al |
||||
adcl $0, %edx |
||||
|
||||
/* Calculate the length of the match. If it is longer than MAX_MATCH, */ |
||||
/* then automatically accept it as the best possible match and leave. */ |
||||
|
||||
lea (%edi,%edx), %eax |
||||
movl scan(%esp), %edi |
||||
subl %edi, %eax |
||||
cmpl $MAX_MATCH, %eax |
||||
jge LenMaximum |
||||
|
||||
/* If the length of the match is not longer than the best match we */ |
||||
/* have so far, then forget it and return to the lookup loop. */ |
||||
|
||||
movl deflatestate(%esp), %edx |
||||
movl bestlen(%esp), %ebx |
||||
cmpl %ebx, %eax |
||||
jg LongerMatch |
||||
movl chainlenscanend(%esp), %ebx |
||||
movl windowbestlen(%esp), %esi |
||||
movl dsPrev(%edx), %edi |
||||
movl wmask(%esp), %edx |
||||
andl %ecx, %edx |
||||
jmp LookupLoop |
||||
|
||||
/* s->match_start = cur_match; */ |
||||
/* best_len = len; */ |
||||
/* if (len >= nice_match) break; */ |
||||
/* scan_end = *(ushf*)(scan+best_len-1); */ |
||||
|
||||
LongerMatch: movl nicematch(%esp), %ebx |
||||
movl %eax, bestlen(%esp) |
||||
movl %ecx, dsMatchStart(%edx) |
||||
cmpl %ebx, %eax |
||||
jge LeaveNow |
||||
movl window(%esp), %esi |
||||
addl %eax, %esi |
||||
movl %esi, windowbestlen(%esp) |
||||
movl chainlenscanend(%esp), %ebx |
||||
movw -1(%edi,%eax), %bx |
||||
movl dsPrev(%edx), %edi |
||||
movl %ebx, chainlenscanend(%esp) |
||||
movl wmask(%esp), %edx |
||||
andl %ecx, %edx |
||||
jmp LookupLoop |
||||
|
||||
/* Accept the current string, with the maximum possible length. */ |
||||
|
||||
LenMaximum: movl deflatestate(%esp), %edx |
||||
movl $MAX_MATCH, bestlen(%esp) |
||||
movl %ecx, dsMatchStart(%edx) |
||||
|
||||
/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ |
||||
/* return s->lookahead; */ |
||||
|
||||
LeaveNow: |
||||
movl deflatestate(%esp), %edx |
||||
movl bestlen(%esp), %ebx |
||||
movl dsLookahead(%edx), %eax |
||||
cmpl %eax, %ebx |
||||
jg LookaheadRet |
||||
movl %ebx, %eax |
||||
LookaheadRet: |
||||
|
||||
/* Restore the stack and return from whence we came. */ |
||||
|
||||
addl $LocalVarsSize, %esp |
||||
popl %ebx |
||||
popl %esi |
||||
popl %edi |
||||
popl %ebp |
||||
match_init: ret |
@ -0,0 +1,574 @@ |
||||
/* |
||||
;uInt longest_match_x64(
|
||||
; deflate_state *s,
|
||||
; IPos cur_match); // current match
|
||||
|
||||
; gvmat64.S -- Asm portion of the optimized longest_match for 32 bits x86_64
|
||||
; (AMD64 on Athlon 64, Opteron, Phenom
|
||||
; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7)
|
||||
; this file is translation from gvmat64.asm to GCC 4.x (for Linux, Mac XCode)
|
||||
; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
|
||||
;
|
||||
; File written by Gilles Vollant, by converting to assembly the longest_match
|
||||
; from Jean-loup Gailly in deflate.c of zLib and infoZip zip.
|
||||
; and by taking inspiration on asm686 with masm, optimised assembly code
|
||||
; from Brian Raiter, written 1998
|
||||
;
|
||||
; This software is provided 'as-is', without any express or implied
|
||||
; warranty. In no event will the authors be held liable for any damages
|
||||
; arising from the use of this software.
|
||||
;
|
||||
; Permission is granted to anyone to use this software for any purpose,
|
||||
; including commercial applications, and to alter it and redistribute it
|
||||
; freely, subject to the following restrictions:
|
||||
;
|
||||
; 1. The origin of this software must not be misrepresented; you must not
|
||||
; claim that you wrote the original software. If you use this software
|
||||
; in a product, an acknowledgment in the product documentation would be
|
||||
; appreciated but is not required.
|
||||
; 2. Altered source versions must be plainly marked as such, and must not be
|
||||
; misrepresented as being the original software
|
||||
; 3. This notice may not be removed or altered from any source distribution.
|
||||
;
|
||||
; http://www.zlib.net
|
||||
; http://www.winimage.com/zLibDll
|
||||
; http://www.muppetlabs.com/~breadbox/software/assembly.html
|
||||
;
|
||||
; to compile this file for zLib, I use option:
|
||||
; gcc -c -arch x86_64 gvmat64.S
|
||||
|
||||
|
||||
;uInt longest_match(s, cur_match)
|
||||
; deflate_state *s;
|
||||
; IPos cur_match; // current match /
|
||||
;
|
||||
; with XCode for Mac, I had strange error with some jump on intel syntax
|
||||
; this is why BEFORE_JMP and AFTER_JMP are used
|
||||
*/ |
||||
|
||||
|
||||
#define BEFORE_JMP .att_syntax |
||||
#define AFTER_JMP .intel_syntax noprefix |
||||
|
||||
#ifndef NO_UNDERLINE |
||||
# define match_init _match_init |
||||
# define longest_match _longest_match |
||||
#endif |
||||
|
||||
.intel_syntax noprefix
|
||||
|
||||
.globl match_init, longest_match |
||||
.text |
||||
longest_match: |
||||
|
||||
|
||||
|
||||
#define LocalVarsSize 96 |
||||
/* |
||||
; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12
|
||||
; free register : r14,r15
|
||||
; register can be saved : rsp
|
||||
*/ |
||||
|
||||
#define chainlenwmask (rsp + 8 - LocalVarsSize) |
||||
#define nicematch (rsp + 16 - LocalVarsSize) |
||||
|
||||
#define save_rdi (rsp + 24 - LocalVarsSize) |
||||
#define save_rsi (rsp + 32 - LocalVarsSize) |
||||
#define save_rbx (rsp + 40 - LocalVarsSize) |
||||
#define save_rbp (rsp + 48 - LocalVarsSize) |
||||
#define save_r12 (rsp + 56 - LocalVarsSize) |
||||
#define save_r13 (rsp + 64 - LocalVarsSize) |
||||
#define save_r14 (rsp + 72 - LocalVarsSize) |
||||
#define save_r15 (rsp + 80 - LocalVarsSize) |
||||
|
||||
|
||||
/* |
||||
; all the +4 offsets are due to the addition of pending_buf_size (in zlib
|
||||
; in the deflate_state structure since the asm code was first written
|
||||
; (if you compile with zlib 1.0.4 or older, remove the +4).
|
||||
; Note : these value are good with a 8 bytes boundary pack structure
|
||||
*/ |
||||
|
||||
#define MAX_MATCH 258 |
||||
#define MIN_MATCH 3 |
||||
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) |
||||
|
||||
/* |
||||
;;; Offsets for fields in the deflate_state structure. These numbers
|
||||
;;; are calculated from the definition of deflate_state, with the
|
||||
;;; assumption that the compiler will dword-align the fields. (Thus,
|
||||
;;; changing the definition of deflate_state could easily cause this
|
||||
;;; program to crash horribly, without so much as a warning at
|
||||
;;; compile time. Sigh.)
|
||||
|
||||
; all the +zlib1222add offsets are due to the addition of fields
|
||||
; in zlib in the deflate_state structure since the asm code was first written
|
||||
; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
|
||||
; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
|
||||
; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
|
||||
*/ |
||||
|
||||
|
||||
|
||||
/* you can check the structure offset by running |
||||
|
||||
#include <stdlib.h> |
||||
#include <stdio.h> |
||||
#include "deflate.h" |
||||
|
||||
void print_depl() |
||||
{ |
||||
deflate_state ds;
|
||||
deflate_state *s=&ds;
|
||||
printf("size pointer=%u\n",(int)sizeof(void*));
|
||||
|
||||
printf("#define dsWSize %u\n",(int)(((char*)&(s->w_size))-((char*)s))); |
||||
printf("#define dsWMask %u\n",(int)(((char*)&(s->w_mask))-((char*)s))); |
||||
printf("#define dsWindow %u\n",(int)(((char*)&(s->window))-((char*)s))); |
||||
printf("#define dsPrev %u\n",(int)(((char*)&(s->prev))-((char*)s))); |
||||
printf("#define dsMatchLen %u\n",(int)(((char*)&(s->match_length))-((char*)s))); |
||||
printf("#define dsPrevMatch %u\n",(int)(((char*)&(s->prev_match))-((char*)s))); |
||||
printf("#define dsStrStart %u\n",(int)(((char*)&(s->strstart))-((char*)s))); |
||||
printf("#define dsMatchStart %u\n",(int)(((char*)&(s->match_start))-((char*)s))); |
||||
printf("#define dsLookahead %u\n",(int)(((char*)&(s->lookahead))-((char*)s))); |
||||
printf("#define dsPrevLen %u\n",(int)(((char*)&(s->prev_length))-((char*)s))); |
||||
printf("#define dsMaxChainLen %u\n",(int)(((char*)&(s->max_chain_length))-((char*)s))); |
||||
printf("#define dsGoodMatch %u\n",(int)(((char*)&(s->good_match))-((char*)s))); |
||||
printf("#define dsNiceMatch %u\n",(int)(((char*)&(s->nice_match))-((char*)s))); |
||||
} |
||||
*/ |
||||
|
||||
#define dsWSize 68 |
||||
#define dsWMask 76 |
||||
#define dsWindow 80 |
||||
#define dsPrev 96 |
||||
#define dsMatchLen 144 |
||||
#define dsPrevMatch 148 |
||||
#define dsStrStart 156 |
||||
#define dsMatchStart 160 |
||||
#define dsLookahead 164 |
||||
#define dsPrevLen 168 |
||||
#define dsMaxChainLen 172 |
||||
#define dsGoodMatch 188 |
||||
#define dsNiceMatch 192 |
||||
|
||||
#define window_size [ rcx + dsWSize] |
||||
#define WMask [ rcx + dsWMask] |
||||
#define window_ad [ rcx + dsWindow] |
||||
#define prev_ad [ rcx + dsPrev] |
||||
#define strstart [ rcx + dsStrStart] |
||||
#define match_start [ rcx + dsMatchStart] |
||||
#define Lookahead [ rcx + dsLookahead] //; 0ffffffffh on infozip
|
||||
#define prev_length [ rcx + dsPrevLen] |
||||
#define max_chain_length [ rcx + dsMaxChainLen] |
||||
#define good_match [ rcx + dsGoodMatch] |
||||
#define nice_match [ rcx + dsNiceMatch] |
||||
|
||||
/* |
||||
; windows:
|
||||
; parameter 1 in rcx(deflate state s), param 2 in rdx (cur match)
|
||||
|
||||
; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
|
||||
; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
|
||||
;
|
||||
; All registers must be preserved across the call, except for
|
||||
; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch.
|
||||
|
||||
;
|
||||
; gcc on macosx-linux:
|
||||
; see http://www.x86-64.org/documentation/abi-0.99.pdf
|
||||
; param 1 in rdi, param 2 in rsi
|
||||
; rbx, rsp, rbp, r12 to r15 must be preserved
|
||||
|
||||
;;; Save registers that the compiler may be using, and adjust esp to
|
||||
;;; make room for our stack frame.
|
||||
|
||||
|
||||
;;; Retrieve the function arguments. r8d will hold cur_match
|
||||
;;; throughout the entire function. edx will hold the pointer to the
|
||||
;;; deflate_state structure during the function's setup (before
|
||||
;;; entering the main loop.
|
||||
|
||||
; ms: parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match)
|
||||
; mac: param 1 in rdi, param 2 rsi
|
||||
; this clear high 32 bits of r8, which can be garbage in both r8 and rdx
|
||||
*/ |
||||
mov [save_rbx],rbx |
||||
mov [save_rbp],rbp |
||||
|
||||
|
||||
mov rcx,rdi |
||||
|
||||
mov r8d,esi |
||||
|
||||
|
||||
mov [save_r12],r12 |
||||
mov [save_r13],r13 |
||||
mov [save_r14],r14 |
||||
mov [save_r15],r15 |
||||
|
||||
|
||||
//;;; uInt wmask = s->w_mask;
|
||||
//;;; unsigned chain_length = s->max_chain_length;
|
||||
//;;; if (s->prev_length >= s->good_match) {
|
||||
//;;; chain_length >>= 2;
|
||||
//;;; }
|
||||
|
||||
|
||||
mov edi, prev_length |
||||
mov esi, good_match |
||||
mov eax, WMask |
||||
mov ebx, max_chain_length |
||||
cmp edi, esi |
||||
jl LastMatchGood |
||||
shr ebx, 2 |
||||
LastMatchGood: |
||||
|
||||
//;;; chainlen is decremented once beforehand so that the function can
|
||||
//;;; use the sign flag instead of the zero flag for the exit test.
|
||||
//;;; It is then shifted into the high word, to make room for the wmask
|
||||
//;;; value, which it will always accompany.
|
||||
|
||||
dec ebx |
||||
shl ebx, 16 |
||||
or ebx, eax |
||||
|
||||
//;;; on zlib only
|
||||
//;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
|
||||
|
||||
|
||||
|
||||
mov eax, nice_match |
||||
mov [chainlenwmask], ebx |
||||
mov r10d, Lookahead |
||||
cmp r10d, eax |
||||
cmovnl r10d, eax |
||||
mov [nicematch],r10d |
||||
|
||||
|
||||
|
||||
//;;; register Bytef *scan = s->window + s->strstart;
|
||||
mov r10, window_ad |
||||
mov ebp, strstart |
||||
lea r13, [r10 + rbp] |
||||
|
||||
//;;; Determine how many bytes the scan ptr is off from being
|
||||
//;;; dword-aligned.
|
||||
|
||||
mov r9,r13 |
||||
neg r13 |
||||
and r13,3 |
||||
|
||||
//;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
|
||||
//;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
|
||||
|
||||
|
||||
mov eax, window_size |
||||
sub eax, MIN_LOOKAHEAD |
||||
|
||||
|
||||
xor edi,edi |
||||
sub ebp, eax |
||||
|
||||
mov r11d, prev_length |
||||
|
||||
cmovng ebp,edi |
||||
|
||||
//;;; int best_len = s->prev_length;
|
||||
|
||||
|
||||
//;;; Store the sum of s->window + best_len in esi locally, and in esi.
|
||||
|
||||
lea rsi,[r10+r11] |
||||
|
||||
//;;; register ush scan_start = *(ushf*)scan;
|
||||
//;;; register ush scan_end = *(ushf*)(scan+best_len-1);
|
||||
//;;; Posf *prev = s->prev;
|
||||
|
||||
movzx r12d,word ptr [r9] |
||||
movzx ebx, word ptr [r9 + r11 - 1] |
||||
|
||||
mov rdi, prev_ad |
||||
|
||||
//;;; Jump into the main loop.
|
||||
|
||||
mov edx, [chainlenwmask] |
||||
|
||||
cmp bx,word ptr [rsi + r8 - 1] |
||||
jz LookupLoopIsZero |
||||
|
||||
|
||||
|
||||
LookupLoop1: |
||||
and r8d, edx |
||||
|
||||
movzx r8d, word ptr [rdi + r8*2] |
||||
cmp r8d, ebp |
||||
jbe LeaveNow |
||||
|
||||
|
||||
|
||||
sub edx, 0x00010000 |
||||
BEFORE_JMP |
||||
js LeaveNow |
||||
AFTER_JMP |
||||
|
||||
LoopEntry1: |
||||
cmp bx,word ptr [rsi + r8 - 1] |
||||
BEFORE_JMP |
||||
jz LookupLoopIsZero |
||||
AFTER_JMP |
||||
|
||||
LookupLoop2: |
||||
and r8d, edx |
||||
|
||||
movzx r8d, word ptr [rdi + r8*2] |
||||
cmp r8d, ebp |
||||
BEFORE_JMP |
||||
jbe LeaveNow |
||||
AFTER_JMP |
||||
sub edx, 0x00010000 |
||||
BEFORE_JMP |
||||
js LeaveNow |
||||
AFTER_JMP |
||||
|
||||
LoopEntry2: |
||||
cmp bx,word ptr [rsi + r8 - 1] |
||||
BEFORE_JMP |
||||
jz LookupLoopIsZero |
||||
AFTER_JMP |
||||
|
||||
LookupLoop4: |
||||
and r8d, edx |
||||
|
||||
movzx r8d, word ptr [rdi + r8*2] |
||||
cmp r8d, ebp |
||||
BEFORE_JMP |
||||
jbe LeaveNow |
||||
AFTER_JMP |
||||
sub edx, 0x00010000 |
||||
BEFORE_JMP |
||||
js LeaveNow |
||||
AFTER_JMP |
||||
|
||||
LoopEntry4: |
||||
|
||||
cmp bx,word ptr [rsi + r8 - 1] |
||||
BEFORE_JMP |
||||
jnz LookupLoop1 |
||||
jmp LookupLoopIsZero |
||||
AFTER_JMP |
||||
/* |
||||
;;; do {
|
||||
;;; match = s->window + cur_match;
|
||||
;;; if (*(ushf*)(match+best_len-1) != scan_end ||
|
||||
;;; *(ushf*)match != scan_start) continue;
|
||||
;;; [...]
|
||||
;;; } while ((cur_match = prev[cur_match & wmask]) > limit
|
||||
;;; && --chain_length != 0);
|
||||
;;;
|
||||
;;; Here is the inner loop of the function. The function will spend the
|
||||
;;; majority of its time in this loop, and majority of that time will
|
||||
;;; be spent in the first ten instructions.
|
||||
;;;
|
||||
;;; Within this loop:
|
||||
;;; ebx = scanend
|
||||
;;; r8d = curmatch
|
||||
;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
|
||||
;;; esi = windowbestlen - i.e., (window + bestlen)
|
||||
;;; edi = prev
|
||||
;;; ebp = limit
|
||||
*/ |
||||
.balign 16
|
||||
LookupLoop: |
||||
and r8d, edx |
||||
|
||||
movzx r8d, word ptr [rdi + r8*2] |
||||
cmp r8d, ebp |
||||
BEFORE_JMP |
||||
jbe LeaveNow |
||||
AFTER_JMP |
||||
sub edx, 0x00010000 |
||||
BEFORE_JMP |
||||
js LeaveNow |
||||
AFTER_JMP |
||||
|
||||
LoopEntry: |
||||
|
||||
cmp bx,word ptr [rsi + r8 - 1] |
||||
BEFORE_JMP |
||||
jnz LookupLoop1 |
||||
AFTER_JMP |
||||
LookupLoopIsZero: |
||||
cmp r12w, word ptr [r10 + r8] |
||||
BEFORE_JMP |
||||
jnz LookupLoop1 |
||||
AFTER_JMP |
||||
|
||||
|
||||
//;;; Store the current value of chainlen.
|
||||
mov [chainlenwmask], edx |
||||
/* |
||||
;;; Point edi to the string under scrutiny, and esi to the string we
|
||||
;;; are hoping to match it up with. In actuality, esi and edi are
|
||||
;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
|
||||
;;; initialized to -(MAX_MATCH_8 - scanalign).
|
||||
*/ |
||||
lea rsi,[r8+r10] |
||||
mov rdx, 0xfffffffffffffef8 //; -(MAX_MATCH_8)
|
||||
lea rsi, [rsi + r13 + 0x0108] //;MAX_MATCH_8]
|
||||
lea rdi, [r9 + r13 + 0x0108] //;MAX_MATCH_8]
|
||||
|
||||
prefetcht1 [rsi+rdx] |
||||
prefetcht1 [rdi+rdx] |
||||
|
||||
/* |
||||
;;; Test the strings for equality, 8 bytes at a time. At the end,
|
||||
;;; adjust rdx so that it is offset to the exact byte that mismatched.
|
||||
;;;
|
||||
;;; We already know at this point that the first three bytes of the
|
||||
;;; strings match each other, and they can be safely passed over before
|
||||
;;; starting the compare loop. So what this code does is skip over 0-3
|
||||
;;; bytes, as much as necessary in order to dword-align the edi
|
||||
;;; pointer. (rsi will still be misaligned three times out of four.)
|
||||
;;;
|
||||
;;; It should be confessed that this loop usually does not represent
|
||||
;;; much of the total running time. Replacing it with a more
|
||||
;;; straightforward "rep cmpsb" would not drastically degrade
|
||||
;;; performance.
|
||||
*/ |
||||
|
||||
LoopCmps: |
||||
mov rax, [rsi + rdx] |
||||
xor rax, [rdi + rdx] |
||||
jnz LeaveLoopCmps |
||||
|
||||
mov rax, [rsi + rdx + 8] |
||||
xor rax, [rdi + rdx + 8] |
||||
jnz LeaveLoopCmps8 |
||||
|
||||
|
||||
mov rax, [rsi + rdx + 8+8] |
||||
xor rax, [rdi + rdx + 8+8] |
||||
jnz LeaveLoopCmps16 |
||||
|
||||
add rdx,8+8+8 |
||||
|
||||
BEFORE_JMP |
||||
jnz LoopCmps |
||||
jmp LenMaximum |
||||
AFTER_JMP |
||||
|
||||
LeaveLoopCmps16: add rdx,8 |
||||
LeaveLoopCmps8: add rdx,8 |
||||
LeaveLoopCmps: |
||||
|
||||
test eax, 0x0000FFFF |
||||
jnz LenLower |
||||
|
||||
test eax,0xffffffff |
||||
|
||||
jnz LenLower32 |
||||
|
||||
add rdx,4 |
||||
shr rax,32 |
||||
or ax,ax |
||||
BEFORE_JMP |
||||
jnz LenLower |
||||
AFTER_JMP |
||||
|
||||
LenLower32: |
||||
shr eax,16 |
||||
add rdx,2 |
||||
|
||||
LenLower:
|
||||
sub al, 1 |
||||
adc rdx, 0 |
||||
//;;; Calculate the length of the match. If it is longer than MAX_MATCH,
|
||||
//;;; then automatically accept it as the best possible match and leave.
|
||||
|
||||
lea rax, [rdi + rdx] |
||||
sub rax, r9 |
||||
cmp eax, MAX_MATCH |
||||
BEFORE_JMP |
||||
jge LenMaximum |
||||
AFTER_JMP |
||||
/* |
||||
;;; If the length of the match is not longer than the best match we
|
||||
;;; have so far, then forget it and return to the lookup loop.
|
||||
;///////////////////////////////////
|
||||
*/ |
||||
cmp eax, r11d |
||||
jg LongerMatch |
||||
|
||||
lea rsi,[r10+r11] |
||||
|
||||
mov rdi, prev_ad |
||||
mov edx, [chainlenwmask] |
||||
BEFORE_JMP |
||||
jmp LookupLoop |
||||
AFTER_JMP |
||||
/* |
||||
;;; s->match_start = cur_match;
|
||||
;;; best_len = len;
|
||||
;;; if (len >= nice_match) break;
|
||||
;;; scan_end = *(ushf*)(scan+best_len-1);
|
||||
*/ |
||||
LongerMatch: |
||||
mov r11d, eax |
||||
mov match_start, r8d |
||||
cmp eax, [nicematch] |
||||
BEFORE_JMP |
||||
jge LeaveNow |
||||
AFTER_JMP |
||||
|
||||
lea rsi,[r10+rax] |
||||
|
||||
movzx ebx, word ptr [r9 + rax - 1] |
||||
mov rdi, prev_ad |
||||
mov edx, [chainlenwmask] |
||||
BEFORE_JMP |
||||
jmp LookupLoop |
||||
AFTER_JMP |
||||
|
||||
//;;; Accept the current string, with the maximum possible length.
|
||||
|
||||
LenMaximum: |
||||
mov r11d,MAX_MATCH |
||||
mov match_start, r8d |
||||
|
||||
//;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
|
||||
//;;; return s->lookahead;
|
||||
|
||||
LeaveNow: |
||||
mov eax, Lookahead |
||||
cmp r11d, eax |
||||
cmovng eax, r11d |
||||
|
||||
|
||||
|
||||
//;;; Restore the stack and return from whence we came.
|
||||
|
||||
|
||||
// mov rsi,[save_rsi] |
||||
// mov rdi,[save_rdi] |
||||
mov rbx,[save_rbx] |
||||
mov rbp,[save_rbp] |
||||
mov r12,[save_r12] |
||||
mov r13,[save_r13] |
||||
mov r14,[save_r14] |
||||
mov r15,[save_r15] |
||||
|
||||
|
||||
ret 0 |
||||
//; please don't remove this string !
|
||||
//; Your can freely use gvmat64 in any free or commercial app
|
||||
//; but it is far better don't remove the string in the binary!
|
||||
// db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0 |
||||
|
||||
|
||||
match_init: |
||||
ret 0 |
||||
|
||||
|
@ -1,413 +0,0 @@ |
||||
|
||||
; match.asm -- Pentium-Pro optimized version of longest_match() |
||||
; |
||||
; Updated for zlib 1.1.3 and converted to MASM 6.1x |
||||
; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> |
||||
; and Chuck Walbourn <chuckw@kinesoft.com> |
||||
; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> |
||||
; |
||||
; This is free software; you can redistribute it and/or modify it |
||||
; under the terms of the GNU General Public License. |
||||
|
||||
; Based on match.S |
||||
; Written for zlib 1.1.2 |
||||
; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
||||
; |
||||
; Modified by Gilles Vollant (2005) for add gzhead and gzindex |
||||
|
||||
.686P |
||||
.MODEL FLAT |
||||
|
||||
;=========================================================================== |
||||
; EQUATES |
||||
;=========================================================================== |
||||
|
||||
MAX_MATCH EQU 258 |
||||
MIN_MATCH EQU 3 |
||||
MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) |
||||
MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) |
||||
|
||||
;=========================================================================== |
||||
; STRUCTURES |
||||
;=========================================================================== |
||||
|
||||
; This STRUCT assumes a 4-byte alignment |
||||
|
||||
DEFLATE_STATE STRUCT |
||||
ds_strm dd ? |
||||
ds_status dd ? |
||||
ds_pending_buf dd ? |
||||
ds_pending_buf_size dd ? |
||||
ds_pending_out dd ? |
||||
ds_pending dd ? |
||||
ds_wrap dd ? |
||||
; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h) |
||||
ds_gzhead dd ? |
||||
ds_gzindex dd ? |
||||
ds_data_type db ? |
||||
ds_method db ? |
||||
db ? ; padding |
||||
db ? ; padding |
||||
ds_last_flush dd ? |
||||
ds_w_size dd ? ; used |
||||
ds_w_bits dd ? |
||||
ds_w_mask dd ? ; used |
||||
ds_window dd ? ; used |
||||
ds_window_size dd ? |
||||
ds_prev dd ? ; used |
||||
ds_head dd ? |
||||
ds_ins_h dd ? |
||||
ds_hash_size dd ? |
||||
ds_hash_bits dd ? |
||||
ds_hash_mask dd ? |
||||
ds_hash_shift dd ? |
||||
ds_block_start dd ? |
||||
ds_match_length dd ? ; used |
||||
ds_prev_match dd ? ; used |
||||
ds_match_available dd ? |
||||
ds_strstart dd ? ; used |
||||
ds_match_start dd ? ; used |
||||
ds_lookahead dd ? ; used |
||||
ds_prev_length dd ? ; used |
||||
ds_max_chain_length dd ? ; used |
||||
ds_max_laxy_match dd ? |
||||
ds_level dd ? |
||||
ds_strategy dd ? |
||||
ds_good_match dd ? ; used |
||||
ds_nice_match dd ? ; used |
||||
|
||||
; Don't need anymore of the struct for match |
||||
DEFLATE_STATE ENDS |
||||
|
||||
;=========================================================================== |
||||
; CODE |
||||
;=========================================================================== |
||||
_TEXT SEGMENT |
||||
|
||||
;--------------------------------------------------------------------------- |
||||
; match_init |
||||
;--------------------------------------------------------------------------- |
||||
ALIGN 4 |
||||
PUBLIC _match_init |
||||
_match_init PROC |
||||
; no initialization needed |
||||
ret |
||||
_match_init ENDP |
||||
|
||||
;--------------------------------------------------------------------------- |
||||
; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
||||
;--------------------------------------------------------------------------- |
||||
ALIGN 4 |
||||
|
||||
PUBLIC _longest_match |
||||
_longest_match PROC |
||||
|
||||
; Since this code uses EBP for a scratch register, the stack frame must |
||||
; be manually constructed and referenced relative to the ESP register. |
||||
|
||||
; Stack image |
||||
; Variables |
||||
chainlenwmask = 0 ; high word: current chain len |
||||
; low word: s->wmask |
||||
window = 4 ; local copy of s->window |
||||
windowbestlen = 8 ; s->window + bestlen |
||||
scanend = 12 ; last two bytes of string |
||||
scanstart = 16 ; first two bytes of string |
||||
scanalign = 20 ; dword-misalignment of string |
||||
nicematch = 24 ; a good enough match size |
||||
bestlen = 28 ; size of best match so far |
||||
scan = 32 ; ptr to string wanting match |
||||
varsize = 36 ; number of bytes (also offset to last saved register) |
||||
|
||||
; Saved Registers (actually pushed into place) |
||||
ebx_save = 36 |
||||
edi_save = 40 |
||||
esi_save = 44 |
||||
ebp_save = 48 |
||||
|
||||
; Parameters |
||||
retaddr = 52 |
||||
deflatestate = 56 |
||||
curmatch = 60 |
||||
|
||||
; Save registers that the compiler may be using |
||||
push ebp |
||||
push edi |
||||
push esi |
||||
push ebx |
||||
|
||||
; Allocate local variable space |
||||
sub esp,varsize |
||||
|
||||
; Retrieve the function arguments. ecx will hold cur_match |
||||
; throughout the entire function. edx will hold the pointer to the |
||||
; deflate_state structure during the function's setup (before |
||||
; entering the main loop). |
||||
|
||||
mov edx, [esp+deflatestate] |
||||
ASSUME edx:PTR DEFLATE_STATE |
||||
|
||||
mov ecx, [esp+curmatch] |
||||
|
||||
; uInt wmask = s->w_mask; |
||||
; unsigned chain_length = s->max_chain_length; |
||||
; if (s->prev_length >= s->good_match) { |
||||
; chain_length >>= 2; |
||||
; } |
||||
|
||||
mov eax, [edx].ds_prev_length |
||||
mov ebx, [edx].ds_good_match |
||||
cmp eax, ebx |
||||
mov eax, [edx].ds_w_mask |
||||
mov ebx, [edx].ds_max_chain_length |
||||
jl SHORT LastMatchGood |
||||
shr ebx, 2 |
||||
LastMatchGood: |
||||
|
||||
; chainlen is decremented once beforehand so that the function can |
||||
; use the sign flag instead of the zero flag for the exit test. |
||||
; It is then shifted into the high word, to make room for the wmask |
||||
; value, which it will always accompany. |
||||
|
||||
dec ebx |
||||
shl ebx, 16 |
||||
or ebx, eax |
||||
mov [esp+chainlenwmask], ebx |
||||
|
||||
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
||||
|
||||
mov eax, [edx].ds_nice_match |
||||
mov ebx, [edx].ds_lookahead |
||||
cmp ebx, eax |
||||
jl SHORT LookaheadLess |
||||
mov ebx, eax |
||||
LookaheadLess: |
||||
mov [esp+nicematch], ebx |
||||
|
||||
;/* register Bytef *scan = s->window + s->strstart; */ |
||||
|
||||
mov esi, [edx].ds_window |
||||
mov [esp+window], esi |
||||
mov ebp, [edx].ds_strstart |
||||
lea edi, [esi+ebp] |
||||
mov [esp+scan],edi |
||||
|
||||
;/* Determine how many bytes the scan ptr is off from being */ |
||||
;/* dword-aligned. */ |
||||
|
||||
mov eax, edi |
||||
neg eax |
||||
and eax, 3 |
||||
mov [esp+scanalign], eax |
||||
|
||||
;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ |
||||
;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ |
||||
|
||||
mov eax, [edx].ds_w_size |
||||
sub eax, MIN_LOOKAHEAD |
||||
sub ebp, eax |
||||
jg SHORT LimitPositive |
||||
xor ebp, ebp |
||||
LimitPositive: |
||||
|
||||
;/* int best_len = s->prev_length; */ |
||||
|
||||
mov eax, [edx].ds_prev_length |
||||
mov [esp+bestlen], eax |
||||
|
||||
;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ |
||||
|
||||
add esi, eax |
||||
mov [esp+windowbestlen], esi |
||||
|
||||
;/* register ush scan_start = *(ushf*)scan; */ |
||||
;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ |
||||
;/* Posf *prev = s->prev; */ |
||||
|
||||
movzx ebx, WORD PTR[edi] |
||||
mov [esp+scanstart], ebx |
||||
movzx ebx, WORD PTR[eax+edi-1] |
||||
mov [esp+scanend], ebx |
||||
mov edi, [edx].ds_prev |
||||
|
||||
;/* Jump into the main loop. */ |
||||
|
||||
mov edx, [esp+chainlenwmask] |
||||
jmp SHORT LoopEntry |
||||
|
||||
;/* do { |
||||
; * match = s->window + cur_match; |
||||
; * if (*(ushf*)(match+best_len-1) != scan_end || |
||||
; * *(ushf*)match != scan_start) continue; |
||||
; * [...] |
||||
; * } while ((cur_match = prev[cur_match & wmask]) > limit |
||||
; * && --chain_length != 0); |
||||
; * |
||||
; * Here is the inner loop of the function. The function will spend the |
||||
; * majority of its time in this loop, and majority of that time will |
||||
; * be spent in the first ten instructions. |
||||
; * |
||||
; * Within this loop: |
||||
; * %ebx = scanend |
||||
; * %ecx = curmatch |
||||
; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) |
||||
; * %esi = windowbestlen - i.e., (window + bestlen) |
||||
; * %edi = prev |
||||
; * %ebp = limit |
||||
; */ |
||||
|
||||
ALIGN 4 |
||||
LookupLoop: |
||||
and ecx, edx |
||||
movzx ecx, WORD PTR[edi+ecx*2] |
||||
cmp ecx, ebp |
||||
jbe LeaveNow |
||||
sub edx, 000010000H |
||||
js LeaveNow |
||||
|
||||
LoopEntry: |
||||
movzx eax, WORD PTR[esi+ecx-1] |
||||
cmp eax, ebx |
||||
jnz SHORT LookupLoop |
||||
|
||||
mov eax, [esp+window] |
||||
movzx eax, WORD PTR[eax+ecx] |
||||
cmp eax, [esp+scanstart] |
||||
jnz SHORT LookupLoop |
||||
|
||||
;/* Store the current value of chainlen. */ |
||||
|
||||
mov [esp+chainlenwmask], edx |
||||
|
||||
;/* Point %edi to the string under scrutiny, and %esi to the string we */ |
||||
;/* are hoping to match it up with. In actuality, %esi and %edi are */ |
||||
;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ |
||||
;/* initialized to -(MAX_MATCH_8 - scanalign). */ |
||||
|
||||
mov esi, [esp+window] |
||||
mov edi, [esp+scan] |
||||
add esi, ecx |
||||
mov eax, [esp+scanalign] |
||||
mov edx, -MAX_MATCH_8 |
||||
lea edi, [edi+eax+MAX_MATCH_8] |
||||
lea esi, [esi+eax+MAX_MATCH_8] |
||||
|
||||
;/* Test the strings for equality, 8 bytes at a time. At the end, |
||||
; * adjust %edx so that it is offset to the exact byte that mismatched. |
||||
; * |
||||
; * We already know at this point that the first three bytes of the |
||||
; * strings match each other, and they can be safely passed over before |
||||
; * starting the compare loop. So what this code does is skip over 0-3 |
||||
; * bytes, as much as necessary in order to dword-align the %edi |
||||
; * pointer. (%esi will still be misaligned three times out of four.) |
||||
; * |
||||
; * It should be confessed that this loop usually does not represent |
||||
; * much of the total running time. Replacing it with a more |
||||
; * straightforward "rep cmpsb" would not drastically degrade |
||||
; * performance. |
||||
; */ |
||||
|
||||
LoopCmps: |
||||
mov eax, DWORD PTR[esi+edx] |
||||
xor eax, DWORD PTR[edi+edx] |
||||
jnz SHORT LeaveLoopCmps |
||||
|
||||
mov eax, DWORD PTR[esi+edx+4] |
||||
xor eax, DWORD PTR[edi+edx+4] |
||||
jnz SHORT LeaveLoopCmps4 |
||||
|
||||
add edx, 8 |
||||
jnz SHORT LoopCmps |
||||
jmp LenMaximum |
||||
ALIGN 4 |
||||
|
||||
LeaveLoopCmps4: |
||||
add edx, 4 |
||||
|
||||
LeaveLoopCmps: |
||||
test eax, 00000FFFFH |
||||
jnz SHORT LenLower |
||||
|
||||
add edx, 2 |
||||
shr eax, 16 |
||||
|
||||
LenLower: |
||||
sub al, 1 |
||||
adc edx, 0 |
||||
|
||||
;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ |
||||
;/* then automatically accept it as the best possible match and leave. */ |
||||
|
||||
lea eax, [edi+edx] |
||||
mov edi, [esp+scan] |
||||
sub eax, edi |
||||
cmp eax, MAX_MATCH |
||||
jge SHORT LenMaximum |
||||
|
||||
;/* If the length of the match is not longer than the best match we */ |
||||
;/* have so far, then forget it and return to the lookup loop. */ |
||||
|
||||
mov edx, [esp+deflatestate] |
||||
mov ebx, [esp+bestlen] |
||||
cmp eax, ebx |
||||
jg SHORT LongerMatch |
||||
mov esi, [esp+windowbestlen] |
||||
mov edi, [edx].ds_prev |
||||
mov ebx, [esp+scanend] |
||||
mov edx, [esp+chainlenwmask] |
||||
jmp LookupLoop |
||||
ALIGN 4 |
||||
|
||||
;/* s->match_start = cur_match; */ |
||||
;/* best_len = len; */ |
||||
;/* if (len >= nice_match) break; */ |
||||
;/* scan_end = *(ushf*)(scan+best_len-1); */ |
||||
|
||||
LongerMatch: |
||||
mov ebx, [esp+nicematch] |
||||
mov [esp+bestlen], eax |
||||
mov [edx].ds_match_start, ecx |
||||
cmp eax, ebx |
||||
jge SHORT LeaveNow |
||||
mov esi, [esp+window] |
||||
add esi, eax |
||||
mov [esp+windowbestlen], esi |
||||
movzx ebx, WORD PTR[edi+eax-1] |
||||
mov edi, [edx].ds_prev |
||||
mov [esp+scanend], ebx |
||||
mov edx, [esp+chainlenwmask] |
||||
jmp LookupLoop |
||||
ALIGN 4 |
||||
|
||||
;/* Accept the current string, with the maximum possible length. */ |
||||
|
||||
LenMaximum: |
||||
mov edx, [esp+deflatestate] |
||||
mov DWORD PTR[esp+bestlen], MAX_MATCH |
||||
mov [edx].ds_match_start, ecx |
||||
|
||||
;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ |
||||
;/* return s->lookahead; */ |
||||
|
||||
LeaveNow: |
||||
mov edx, [esp+deflatestate] |
||||
mov ebx, [esp+bestlen] |
||||
mov eax, [edx].ds_lookahead |
||||
cmp ebx, eax |
||||
jg SHORT LookaheadRet |
||||
mov eax, ebx |
||||
LookaheadRet: |
||||
|
||||
; Restore the stack and return from whence we came. |
||||
|
||||
add esp, varsize |
||||
pop ebx |
||||
pop esi |
||||
pop edi |
||||
pop ebp |
||||
ret |
||||
|
||||
_longest_match ENDP |
||||
|
||||
_TEXT ENDS |
||||
END |
@ -1,2 +1,2 @@ |
||||
ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm |
||||
ml /coff /Zi /c /Flmatch686.lst match686.asm |
||||
ml /coff /Zi /c /Flinffas32.lst inffas32.asm |
||||
|
@ -1,972 +0,0 @@ |
||||
; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 |
||||
; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. |
||||
; File written by Gilles Vollant, by modifiying the longest_match |
||||
; from Jean-loup Gailly in deflate.c |
||||
; |
||||
; http://www.zlib.net |
||||
; http://www.winimage.com/zLibDll |
||||
; http://www.muppetlabs.com/~breadbox/software/assembly.html |
||||
; |
||||
; For Visual C++ 4.x and higher and ML 6.x and higher |
||||
; ml.exe is in directory \MASM611C of Win95 DDK |
||||
; ml.exe is also distributed in http://www.masm32.com/masmdl.htm |
||||
; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ |
||||
; |
||||
; this file contain two implementation of longest_match |
||||
; |
||||
; longest_match_7fff : written 1996 by Gilles Vollant optimized for |
||||
; first Pentium. Assume s->w_mask == 0x7fff |
||||
; longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro |
||||
; |
||||
; for using an seembly version of longest_match, you need define ASMV in project |
||||
; There is two way in using gvmat32.asm |
||||
; |
||||
; A) Suggested method |
||||
; if you want include both longest_match_7fff and longest_match_686 |
||||
; compile the asm file running |
||||
; ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm |
||||
; and include gvmat32c.c in your project |
||||
; if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff, |
||||
; longest_match_7fff will be used |
||||
; if you have a more modern CPU (Pentium Pro, II and higher) |
||||
; longest_match_686 will be used |
||||
; on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used, |
||||
; but this is not a sitation you'll find often |
||||
; |
||||
; B) Alternative |
||||
; if you are not interresed in old cpu performance and want the smaller |
||||
; binaries possible |
||||
; |
||||
; compile the asm file running |
||||
; ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm |
||||
; and do not include gvmat32c.c in your project (ou define also |
||||
; NOOLDPENTIUMCODE) |
||||
; |
||||
; note : as I known, longest_match_686 is very faster than longest_match_7fff |
||||
; on pentium Pro/II/III, faster (but less) in P4, but it seem |
||||
; longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8 |
||||
; |
||||
; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 |
||||
|
||||
;uInt longest_match_7fff(s, cur_match) |
||||
; deflate_state *s; |
||||
; IPos cur_match; /* current match */ |
||||
|
||||
NbStack equ 76 |
||||
cur_match equ dword ptr[esp+NbStack-0] |
||||
str_s equ dword ptr[esp+NbStack-4] |
||||
; 5 dword on top (ret,ebp,esi,edi,ebx) |
||||
adrret equ dword ptr[esp+NbStack-8] |
||||
pushebp equ dword ptr[esp+NbStack-12] |
||||
pushedi equ dword ptr[esp+NbStack-16] |
||||
pushesi equ dword ptr[esp+NbStack-20] |
||||
pushebx equ dword ptr[esp+NbStack-24] |
||||
|
||||
chain_length equ dword ptr [esp+NbStack-28] |
||||
limit equ dword ptr [esp+NbStack-32] |
||||
best_len equ dword ptr [esp+NbStack-36] |
||||
window equ dword ptr [esp+NbStack-40] |
||||
prev equ dword ptr [esp+NbStack-44] |
||||
scan_start equ word ptr [esp+NbStack-48] |
||||
wmask equ dword ptr [esp+NbStack-52] |
||||
match_start_ptr equ dword ptr [esp+NbStack-56] |
||||
nice_match equ dword ptr [esp+NbStack-60] |
||||
scan equ dword ptr [esp+NbStack-64] |
||||
|
||||
windowlen equ dword ptr [esp+NbStack-68] |
||||
match_start equ dword ptr [esp+NbStack-72] |
||||
strend equ dword ptr [esp+NbStack-76] |
||||
NbStackAdd equ (NbStack-24) |
||||
|
||||
.386p |
||||
|
||||
name gvmatch |
||||
.MODEL FLAT |
||||
|
||||
|
||||
|
||||
; all the +zlib1222add offsets are due to the addition of fields |
||||
; in zlib in the deflate_state structure since the asm code was first written |
||||
; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). |
||||
; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). |
||||
; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). |
||||
|
||||
zlib1222add equ 8 |
||||
|
||||
; Note : these value are good with a 8 bytes boundary pack structure |
||||
dep_chain_length equ 74h+zlib1222add |
||||
dep_window equ 30h+zlib1222add |
||||
dep_strstart equ 64h+zlib1222add |
||||
dep_prev_length equ 70h+zlib1222add |
||||
dep_nice_match equ 88h+zlib1222add |
||||
dep_w_size equ 24h+zlib1222add |
||||
dep_prev equ 38h+zlib1222add |
||||
dep_w_mask equ 2ch+zlib1222add |
||||
dep_good_match equ 84h+zlib1222add |
||||
dep_match_start equ 68h+zlib1222add |
||||
dep_lookahead equ 6ch+zlib1222add |
||||
|
||||
|
||||
_TEXT segment |
||||
|
||||
IFDEF NOUNDERLINE |
||||
IFDEF NOOLDPENTIUMCODE |
||||
public longest_match |
||||
public match_init |
||||
ELSE |
||||
public longest_match_7fff |
||||
public cpudetect32 |
||||
public longest_match_686 |
||||
ENDIF |
||||
ELSE |
||||
IFDEF NOOLDPENTIUMCODE |
||||
public _longest_match |
||||
public _match_init |
||||
ELSE |
||||
public _longest_match_7fff |
||||
public _cpudetect32 |
||||
public _longest_match_686 |
||||
ENDIF |
||||
ENDIF |
||||
|
||||
MAX_MATCH equ 258 |
||||
MIN_MATCH equ 3 |
||||
MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) |
||||
|
||||
|
||||
|
||||
IFNDEF NOOLDPENTIUMCODE |
||||
IFDEF NOUNDERLINE |
||||
longest_match_7fff proc near |
||||
ELSE |
||||
_longest_match_7fff proc near |
||||
ENDIF |
||||
|
||||
mov edx,[esp+4] |
||||
|
||||
|
||||
|
||||
push ebp |
||||
push edi |
||||
push esi |
||||
push ebx |
||||
|
||||
sub esp,NbStackAdd |
||||
|
||||
; initialize or check the variables used in match.asm. |
||||
mov ebp,edx |
||||
|
||||
; chain_length = s->max_chain_length |
||||
; if (prev_length>=good_match) chain_length >>= 2 |
||||
mov edx,[ebp+dep_chain_length] |
||||
mov ebx,[ebp+dep_prev_length] |
||||
cmp [ebp+dep_good_match],ebx |
||||
ja noshr |
||||
shr edx,2 |
||||
noshr: |
||||
; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop |
||||
inc edx |
||||
mov edi,[ebp+dep_nice_match] |
||||
mov chain_length,edx |
||||
mov eax,[ebp+dep_lookahead] |
||||
cmp eax,edi |
||||
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
||||
jae nolookaheadnicematch |
||||
mov edi,eax |
||||
nolookaheadnicematch: |
||||
; best_len = s->prev_length |
||||
mov best_len,ebx |
||||
|
||||
; window = s->window |
||||
mov esi,[ebp+dep_window] |
||||
mov ecx,[ebp+dep_strstart] |
||||
mov window,esi |
||||
|
||||
mov nice_match,edi |
||||
; scan = window + strstart |
||||
add esi,ecx |
||||
mov scan,esi |
||||
; dx = *window |
||||
mov dx,word ptr [esi] |
||||
; bx = *(window+best_len-1) |
||||
mov bx,word ptr [esi+ebx-1] |
||||
add esi,MAX_MATCH-1 |
||||
; scan_start = *scan |
||||
mov scan_start,dx |
||||
; strend = scan + MAX_MATCH-1 |
||||
mov strend,esi |
||||
; bx = scan_end = *(window+best_len-1) |
||||
|
||||
; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
||||
; s->strstart - (IPos)MAX_DIST(s) : NIL; |
||||
|
||||
mov esi,[ebp+dep_w_size] |
||||
sub esi,MIN_LOOKAHEAD |
||||
; here esi = MAX_DIST(s) |
||||
sub ecx,esi |
||||
ja nodist |
||||
xor ecx,ecx |
||||
nodist: |
||||
mov limit,ecx |
||||
|
||||
; prev = s->prev |
||||
mov edx,[ebp+dep_prev] |
||||
mov prev,edx |
||||
|
||||
; |
||||
mov edx,dword ptr [ebp+dep_match_start] |
||||
mov bp,scan_start |
||||
mov eax,cur_match |
||||
mov match_start,edx |
||||
|
||||
mov edx,window |
||||
mov edi,edx |
||||
add edi,best_len |
||||
mov esi,prev |
||||
dec edi |
||||
; windowlen = window + best_len -1 |
||||
mov windowlen,edi |
||||
|
||||
jmp beginloop2 |
||||
align 4 |
||||
|
||||
; here, in the loop |
||||
; eax = ax = cur_match |
||||
; ecx = limit |
||||
; bx = scan_end |
||||
; bp = scan_start |
||||
; edi = windowlen (window + best_len -1) |
||||
; esi = prev |
||||
|
||||
|
||||
;// here; chain_length <=16 |
||||
normalbeg0add16: |
||||
add chain_length,16 |
||||
jz exitloop |
||||
normalbeg0: |
||||
cmp word ptr[edi+eax],bx |
||||
je normalbeg2noroll |
||||
rcontlabnoroll: |
||||
; cur_match = prev[cur_match & wmask] |
||||
and eax,7fffh |
||||
mov ax,word ptr[esi+eax*2] |
||||
; if cur_match > limit, go to exitloop |
||||
cmp ecx,eax |
||||
jnb exitloop |
||||
; if --chain_length != 0, go to exitloop |
||||
dec chain_length |
||||
jnz normalbeg0 |
||||
jmp exitloop |
||||
|
||||
normalbeg2noroll: |
||||
; if (scan_start==*(cur_match+window)) goto normalbeg2 |
||||
cmp bp,word ptr[edx+eax] |
||||
jne rcontlabnoroll |
||||
jmp normalbeg2 |
||||
|
||||
contloop3: |
||||
mov edi,windowlen |
||||
|
||||
; cur_match = prev[cur_match & wmask] |
||||
and eax,7fffh |
||||
mov ax,word ptr[esi+eax*2] |
||||
; if cur_match > limit, go to exitloop |
||||
cmp ecx,eax |
||||
jnbexitloopshort1: |
||||
jnb exitloop |
||||
; if --chain_length != 0, go to exitloop |
||||
|
||||
|
||||
; begin the main loop |
||||
beginloop2: |
||||
sub chain_length,16+1 |
||||
; if chain_length <=16, don't use the unrolled loop |
||||
jna normalbeg0add16 |
||||
|
||||
do16: |
||||
cmp word ptr[edi+eax],bx |
||||
je normalbeg2dc0 |
||||
|
||||
maccn MACRO lab |
||||
and eax,7fffh |
||||
mov ax,word ptr[esi+eax*2] |
||||
cmp ecx,eax |
||||
jnb exitloop |
||||
cmp word ptr[edi+eax],bx |
||||
je lab |
||||
ENDM |
||||
|
||||
rcontloop0: |
||||
maccn normalbeg2dc1 |
||||
|
||||
rcontloop1: |
||||
maccn normalbeg2dc2 |
||||
|
||||
rcontloop2: |
||||
maccn normalbeg2dc3 |
||||
|
||||
rcontloop3: |
||||
maccn normalbeg2dc4 |
||||
|
||||
rcontloop4: |
||||
maccn normalbeg2dc5 |
||||
|
||||
rcontloop5: |
||||
maccn normalbeg2dc6 |
||||
|
||||
rcontloop6: |
||||
maccn normalbeg2dc7 |
||||
|
||||
rcontloop7: |
||||
maccn normalbeg2dc8 |
||||
|
||||
rcontloop8: |
||||
maccn normalbeg2dc9 |
||||
|
||||
rcontloop9: |
||||
maccn normalbeg2dc10 |
||||
|
||||
rcontloop10: |
||||
maccn short normalbeg2dc11 |
||||
|
||||
rcontloop11: |
||||
maccn short normalbeg2dc12 |
||||
|
||||
rcontloop12: |
||||
maccn short normalbeg2dc13 |
||||
|
||||
rcontloop13: |
||||
maccn short normalbeg2dc14 |
||||
|
||||
rcontloop14: |
||||
maccn short normalbeg2dc15 |
||||
|
||||
rcontloop15: |
||||
and eax,7fffh |
||||
mov ax,word ptr[esi+eax*2] |
||||
cmp ecx,eax |
||||
jnb exitloop |
||||
|
||||
sub chain_length,16 |
||||
ja do16 |
||||
jmp normalbeg0add16 |
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
||||
|
||||
normbeg MACRO rcontlab,valsub |
||||
; if we are here, we know that *(match+best_len-1) == scan_end |
||||
cmp bp,word ptr[edx+eax] |
||||
; if (match != scan_start) goto rcontlab |
||||
jne rcontlab |
||||
; calculate the good chain_length, and we'll compare scan and match string |
||||
add chain_length,16-valsub |
||||
jmp iseq |
||||
ENDM |
||||
|
||||
|
||||
normalbeg2dc11: |
||||
normbeg rcontloop11,11 |
||||
|
||||
normalbeg2dc12: |
||||
normbeg short rcontloop12,12 |
||||
|
||||
normalbeg2dc13: |
||||
normbeg short rcontloop13,13 |
||||
|
||||
normalbeg2dc14: |
||||
normbeg short rcontloop14,14 |
||||
|
||||
normalbeg2dc15: |
||||
normbeg short rcontloop15,15 |
||||
|
||||
normalbeg2dc10: |
||||
normbeg rcontloop10,10 |
||||
|
||||
normalbeg2dc9: |
||||
normbeg rcontloop9,9 |
||||
|
||||
normalbeg2dc8: |
||||
normbeg rcontloop8,8 |
||||
|
||||
normalbeg2dc7: |
||||
normbeg rcontloop7,7 |
||||
|
||||
normalbeg2dc6: |
||||
normbeg rcontloop6,6 |
||||
|
||||
normalbeg2dc5: |
||||
normbeg rcontloop5,5 |
||||
|
||||
normalbeg2dc4: |
||||
normbeg rcontloop4,4 |
||||
|
||||
normalbeg2dc3: |
||||
normbeg rcontloop3,3 |
||||
|
||||
normalbeg2dc2: |
||||
normbeg rcontloop2,2 |
||||
|
||||
normalbeg2dc1: |
||||
normbeg rcontloop1,1 |
||||
|
||||
normalbeg2dc0: |
||||
normbeg rcontloop0,0 |
||||
|
||||
|
||||
; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end |
||||
|
||||
normalbeg2: |
||||
mov edi,window |
||||
|
||||
cmp bp,word ptr[edi+eax] |
||||
jne contloop3 ; if *(ushf*)match != scan_start, continue |
||||
|
||||
iseq: |
||||
; if we are here, we know that *(match+best_len-1) == scan_end |
||||
; and (match == scan_start) |
||||
|
||||
mov edi,edx |
||||
mov esi,scan ; esi = scan |
||||
add edi,eax ; edi = window + cur_match = match |
||||
|
||||
mov edx,[esi+3] ; compare manually dword at match+3 |
||||
xor edx,[edi+3] ; and scan +3 |
||||
|
||||
jz begincompare ; if equal, go to long compare |
||||
|
||||
; we will determine the unmatch byte and calculate len (in esi) |
||||
or dl,dl |
||||
je eq1rr |
||||
mov esi,3 |
||||
jmp trfinval |
||||
eq1rr: |
||||
or dx,dx |
||||
je eq1 |
||||
|
||||
mov esi,4 |
||||
jmp trfinval |
||||
eq1: |
||||
and edx,0ffffffh |
||||
jz eq11 |
||||
mov esi,5 |
||||
jmp trfinval |
||||
eq11: |
||||
mov esi,6 |
||||
jmp trfinval |
||||
|
||||
begincompare: |
||||
; here we now scan and match begin same |
||||
add edi,6 |
||||
add esi,6 |
||||
mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes |
||||
repe cmpsd ; loop until mismatch |
||||
|
||||
je trfin ; go to trfin if not unmatch |
||||
; we determine the unmatch byte |
||||
sub esi,4 |
||||
mov edx,[edi-4] |
||||
xor edx,[esi] |
||||
|
||||
or dl,dl |
||||
jnz trfin |
||||
inc esi |
||||
|
||||
or dx,dx |
||||
jnz trfin |
||||
inc esi |
||||
|
||||
and edx,0ffffffh |
||||
jnz trfin |
||||
inc esi |
||||
|
||||
trfin: |
||||
sub esi,scan ; esi = len |
||||
trfinval: |
||||
; here we have finised compare, and esi contain len of equal string |
||||
cmp esi,best_len ; if len > best_len, go newbestlen |
||||
ja short newbestlen |
||||
; now we restore edx, ecx and esi, for the big loop |
||||
mov esi,prev |
||||
mov ecx,limit |
||||
mov edx,window |
||||
jmp contloop3 |
||||
|
||||
newbestlen: |
||||
mov best_len,esi ; len become best_len |
||||
|
||||
mov match_start,eax ; save new position as match_start |
||||
cmp esi,nice_match ; if best_len >= nice_match, exit |
||||
jae exitloop |
||||
mov ecx,scan |
||||
mov edx,window ; restore edx=window |
||||
add ecx,esi |
||||
add esi,edx |
||||
|
||||
dec esi |
||||
mov windowlen,esi ; windowlen = window + best_len-1 |
||||
mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end |
||||
|
||||
; now we restore ecx and esi, for the big loop : |
||||
mov esi,prev |
||||
mov ecx,limit |
||||
jmp contloop3 |
||||
|
||||
exitloop: |
||||
; exit : s->match_start=match_start |
||||
mov ebx,match_start |
||||
mov ebp,str_s |
||||
mov ecx,best_len |
||||
mov dword ptr [ebp+dep_match_start],ebx |
||||
mov eax,dword ptr [ebp+dep_lookahead] |
||||
cmp ecx,eax |
||||
ja minexlo |
||||
mov eax,ecx |
||||
minexlo: |
||||
; return min(best_len,s->lookahead) |
||||
|
||||
; restore stack and register ebx,esi,edi,ebp |
||||
add esp,NbStackAdd |
||||
|
||||
pop ebx |
||||
pop esi |
||||
pop edi |
||||
pop ebp |
||||
ret |
||||
InfoAuthor: |
||||
; please don't remove this string ! |
||||
; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! |
||||
db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah |
||||
|
||||
|
||||
|
||||
IFDEF NOUNDERLINE |
||||
longest_match_7fff endp |
||||
ELSE |
||||
_longest_match_7fff endp |
||||
ENDIF |
||||
|
||||
|
||||
IFDEF NOUNDERLINE |
||||
cpudetect32 proc near |
||||
ELSE |
||||
_cpudetect32 proc near |
||||
ENDIF |
||||
|
||||
push ebx |
||||
|
||||
pushfd ; push original EFLAGS |
||||
pop eax ; get original EFLAGS |
||||
mov ecx, eax ; save original EFLAGS |
||||
xor eax, 40000h ; flip AC bit in EFLAGS |
||||
push eax ; save new EFLAGS value on stack |
||||
popfd ; replace current EFLAGS value |
||||
pushfd ; get new EFLAGS |
||||
pop eax ; store new EFLAGS in EAX |
||||
xor eax, ecx ; can’t toggle AC bit, processor=80386 |
||||
jz end_cpu_is_386 ; jump if 80386 processor |
||||
push ecx |
||||
popfd ; restore AC bit in EFLAGS first |
||||
|
||||
pushfd |
||||
pushfd |
||||
pop ecx |
||||
|
||||
mov eax, ecx ; get original EFLAGS |
||||
xor eax, 200000h ; flip ID bit in EFLAGS |
||||
push eax ; save new EFLAGS value on stack |
||||
popfd ; replace current EFLAGS value |
||||
pushfd ; get new EFLAGS |
||||
pop eax ; store new EFLAGS in EAX |
||||
popfd ; restore original EFLAGS |
||||
xor eax, ecx ; can’t toggle ID bit, |
||||
je is_old_486 ; processor=old |
||||
|
||||
mov eax,1 |
||||
db 0fh,0a2h ;CPUID |
||||
|
||||
exitcpudetect: |
||||
pop ebx |
||||
ret |
||||
|
||||
end_cpu_is_386: |
||||
mov eax,0300h |
||||
jmp exitcpudetect |
||||
|
||||
is_old_486: |
||||
mov eax,0400h |
||||
jmp exitcpudetect |
||||
|
||||
IFDEF NOUNDERLINE |
||||
cpudetect32 endp |
||||
ELSE |
||||
_cpudetect32 endp |
||||
ENDIF |
||||
ENDIF |
||||
|
||||
MAX_MATCH equ 258 |
||||
MIN_MATCH equ 3 |
||||
MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) |
||||
MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) |
||||
|
||||
|
||||
;;; stack frame offsets |
||||
|
||||
chainlenwmask equ esp + 0 ; high word: current chain len |
||||
; low word: s->wmask |
||||
window equ esp + 4 ; local copy of s->window |
||||
windowbestlen equ esp + 8 ; s->window + bestlen |
||||
scanstart equ esp + 16 ; first two bytes of string |
||||
scanend equ esp + 12 ; last two bytes of string |
||||
scanalign equ esp + 20 ; dword-misalignment of string |
||||
nicematch equ esp + 24 ; a good enough match size |
||||
bestlen equ esp + 28 ; size of best match so far |
||||
scan equ esp + 32 ; ptr to string wanting match |
||||
|
||||
LocalVarsSize equ 36 |
||||
; saved ebx byte esp + 36 |
||||
; saved edi byte esp + 40 |
||||
; saved esi byte esp + 44 |
||||
; saved ebp byte esp + 48 |
||||
; return address byte esp + 52 |
||||
deflatestate equ esp + 56 ; the function arguments |
||||
curmatch equ esp + 60 |
||||
|
||||
;;; Offsets for fields in the deflate_state structure. These numbers |
||||
;;; are calculated from the definition of deflate_state, with the |
||||
;;; assumption that the compiler will dword-align the fields. (Thus, |
||||
;;; changing the definition of deflate_state could easily cause this |
||||
;;; program to crash horribly, without so much as a warning at |
||||
;;; compile time. Sigh.) |
||||
|
||||
dsWSize equ 36+zlib1222add |
||||
dsWMask equ 44+zlib1222add |
||||
dsWindow equ 48+zlib1222add |
||||
dsPrev equ 56+zlib1222add |
||||
dsMatchLen equ 88+zlib1222add |
||||
dsPrevMatch equ 92+zlib1222add |
||||
dsStrStart equ 100+zlib1222add |
||||
dsMatchStart equ 104+zlib1222add |
||||
dsLookahead equ 108+zlib1222add |
||||
dsPrevLen equ 112+zlib1222add |
||||
dsMaxChainLen equ 116+zlib1222add |
||||
dsGoodMatch equ 132+zlib1222add |
||||
dsNiceMatch equ 136+zlib1222add |
||||
|
||||
|
||||
;;; match.asm -- Pentium-Pro-optimized version of longest_match() |
||||
;;; Written for zlib 1.1.2 |
||||
;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
||||
;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html |
||||
;;; |
||||
;;; This is free software; you can redistribute it and/or modify it |
||||
;;; under the terms of the GNU General Public License. |
||||
|
||||
;GLOBAL _longest_match, _match_init |
||||
|
||||
|
||||
;SECTION .text |
||||
|
||||
;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
||||
|
||||
;_longest_match: |
||||
IFDEF NOOLDPENTIUMCODE |
||||
IFDEF NOUNDERLINE |
||||
longest_match proc near |
||||
ELSE |
||||
_longest_match proc near |
||||
ENDIF |
||||
ELSE |
||||
IFDEF NOUNDERLINE |
||||
longest_match_686 proc near |
||||
ELSE |
||||
_longest_match_686 proc near |
||||
ENDIF |
||||
ENDIF |
||||
|
||||
;;; Save registers that the compiler may be using, and adjust esp to |
||||
;;; make room for our stack frame. |
||||
|
||||
push ebp |
||||
push edi |
||||
push esi |
||||
push ebx |
||||
sub esp, LocalVarsSize |
||||
|
||||
;;; Retrieve the function arguments. ecx will hold cur_match |
||||
;;; throughout the entire function. edx will hold the pointer to the |
||||
;;; deflate_state structure during the function's setup (before |
||||
;;; entering the main loop. |
||||
|
||||
mov edx, [deflatestate] |
||||
mov ecx, [curmatch] |
||||
|
||||
;;; uInt wmask = s->w_mask; |
||||
;;; unsigned chain_length = s->max_chain_length; |
||||
;;; if (s->prev_length >= s->good_match) { |
||||
;;; chain_length >>= 2; |
||||
;;; } |
||||
|
||||
mov eax, [edx + dsPrevLen] |
||||
mov ebx, [edx + dsGoodMatch] |
||||
cmp eax, ebx |
||||
mov eax, [edx + dsWMask] |
||||
mov ebx, [edx + dsMaxChainLen] |
||||
jl LastMatchGood |
||||
shr ebx, 2 |
||||
LastMatchGood: |
||||
|
||||
;;; chainlen is decremented once beforehand so that the function can |
||||
;;; use the sign flag instead of the zero flag for the exit test. |
||||
;;; It is then shifted into the high word, to make room for the wmask |
||||
;;; value, which it will always accompany. |
||||
|
||||
dec ebx |
||||
shl ebx, 16 |
||||
or ebx, eax |
||||
mov [chainlenwmask], ebx |
||||
|
||||
;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
||||
|
||||
mov eax, [edx + dsNiceMatch] |
||||
mov ebx, [edx + dsLookahead] |
||||
cmp ebx, eax |
||||
jl LookaheadLess |
||||
mov ebx, eax |
||||
LookaheadLess: mov [nicematch], ebx |
||||
|
||||
;;; register Bytef *scan = s->window + s->strstart; |
||||
|
||||
mov esi, [edx + dsWindow] |
||||
mov [window], esi |
||||
mov ebp, [edx + dsStrStart] |
||||
lea edi, [esi + ebp] |
||||
mov [scan], edi |
||||
|
||||
;;; Determine how many bytes the scan ptr is off from being |
||||
;;; dword-aligned. |
||||
|
||||
mov eax, edi |
||||
neg eax |
||||
and eax, 3 |
||||
mov [scanalign], eax |
||||
|
||||
;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
||||
;;; s->strstart - (IPos)MAX_DIST(s) : NIL; |
||||
|
||||
mov eax, [edx + dsWSize] |
||||
sub eax, MIN_LOOKAHEAD |
||||
sub ebp, eax |
||||
jg LimitPositive |
||||
xor ebp, ebp |
||||
LimitPositive: |
||||
|
||||
;;; int best_len = s->prev_length; |
||||
|
||||
mov eax, [edx + dsPrevLen] |
||||
mov [bestlen], eax |
||||
|
||||
;;; Store the sum of s->window + best_len in esi locally, and in esi. |
||||
|
||||
add esi, eax |
||||
mov [windowbestlen], esi |
||||
|
||||
;;; register ush scan_start = *(ushf*)scan; |
||||
;;; register ush scan_end = *(ushf*)(scan+best_len-1); |
||||
;;; Posf *prev = s->prev; |
||||
|
||||
movzx ebx, word ptr [edi] |
||||
mov [scanstart], ebx |
||||
movzx ebx, word ptr [edi + eax - 1] |
||||
mov [scanend], ebx |
||||
mov edi, [edx + dsPrev] |
||||
|
||||
;;; Jump into the main loop. |
||||
|
||||
mov edx, [chainlenwmask] |
||||
jmp short LoopEntry |
||||
|
||||
align 4 |
||||
|
||||
;;; do { |
||||
;;; match = s->window + cur_match; |
||||
;;; if (*(ushf*)(match+best_len-1) != scan_end || |
||||
;;; *(ushf*)match != scan_start) continue; |
||||
;;; [...] |
||||
;;; } while ((cur_match = prev[cur_match & wmask]) > limit |
||||
;;; && --chain_length != 0); |
||||
;;; |
||||
;;; Here is the inner loop of the function. The function will spend the |
||||
;;; majority of its time in this loop, and majority of that time will |
||||
;;; be spent in the first ten instructions. |
||||
;;; |
||||
;;; Within this loop: |
||||
;;; ebx = scanend |
||||
;;; ecx = curmatch |
||||
;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) |
||||
;;; esi = windowbestlen - i.e., (window + bestlen) |
||||
;;; edi = prev |
||||
;;; ebp = limit |
||||
|
||||
LookupLoop: |
||||
and ecx, edx |
||||
movzx ecx, word ptr [edi + ecx*2] |
||||
cmp ecx, ebp |
||||
jbe LeaveNow |
||||
sub edx, 00010000h |
||||
js LeaveNow |
||||
LoopEntry: movzx eax, word ptr [esi + ecx - 1] |
||||
cmp eax, ebx |
||||
jnz LookupLoop |
||||
mov eax, [window] |
||||
movzx eax, word ptr [eax + ecx] |
||||
cmp eax, [scanstart] |
||||
jnz LookupLoop |
||||
|
||||
;;; Store the current value of chainlen. |
||||
|
||||
mov [chainlenwmask], edx |
||||
|
||||
;;; Point edi to the string under scrutiny, and esi to the string we |
||||
;;; are hoping to match it up with. In actuality, esi and edi are |
||||
;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is |
||||
;;; initialized to -(MAX_MATCH_8 - scanalign). |
||||
|
||||
mov esi, [window] |
||||
mov edi, [scan] |
||||
add esi, ecx |
||||
mov eax, [scanalign] |
||||
mov edx, 0fffffef8h; -(MAX_MATCH_8) |
||||
lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] |
||||
lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] |
||||
|
||||
;;; Test the strings for equality, 8 bytes at a time. At the end, |
||||
;;; adjust edx so that it is offset to the exact byte that mismatched. |
||||
;;; |
||||
;;; We already know at this point that the first three bytes of the |
||||
;;; strings match each other, and they can be safely passed over before |
||||
;;; starting the compare loop. So what this code does is skip over 0-3 |
||||
;;; bytes, as much as necessary in order to dword-align the edi |
||||
;;; pointer. (esi will still be misaligned three times out of four.) |
||||
;;; |
||||
;;; It should be confessed that this loop usually does not represent |
||||
;;; much of the total running time. Replacing it with a more |
||||
;;; straightforward "rep cmpsb" would not drastically degrade |
||||
;;; performance. |
||||
|
||||
LoopCmps: |
||||
mov eax, [esi + edx] |
||||
xor eax, [edi + edx] |
||||
jnz LeaveLoopCmps |
||||
mov eax, [esi + edx + 4] |
||||
xor eax, [edi + edx + 4] |
||||
jnz LeaveLoopCmps4 |
||||
add edx, 8 |
||||
jnz LoopCmps |
||||
jmp short LenMaximum |
||||
LeaveLoopCmps4: add edx, 4 |
||||
LeaveLoopCmps: test eax, 0000FFFFh |
||||
jnz LenLower |
||||
add edx, 2 |
||||
shr eax, 16 |
||||
LenLower: sub al, 1 |
||||
adc edx, 0 |
||||
|
||||
;;; Calculate the length of the match. If it is longer than MAX_MATCH, |
||||
;;; then automatically accept it as the best possible match and leave. |
||||
|
||||
lea eax, [edi + edx] |
||||
mov edi, [scan] |
||||
sub eax, edi |
||||
cmp eax, MAX_MATCH |
||||
jge LenMaximum |
||||
|
||||
;;; If the length of the match is not longer than the best match we |
||||
;;; have so far, then forget it and return to the lookup loop. |
||||
|
||||
mov edx, [deflatestate] |
||||
mov ebx, [bestlen] |
||||
cmp eax, ebx |
||||
jg LongerMatch |
||||
mov esi, [windowbestlen] |
||||
mov edi, [edx + dsPrev] |
||||
mov ebx, [scanend] |
||||
mov edx, [chainlenwmask] |
||||
jmp LookupLoop |
||||
|
||||
;;; s->match_start = cur_match; |
||||
;;; best_len = len; |
||||
;;; if (len >= nice_match) break; |
||||
;;; scan_end = *(ushf*)(scan+best_len-1); |
||||
|
||||
LongerMatch: mov ebx, [nicematch] |
||||
mov [bestlen], eax |
||||
mov [edx + dsMatchStart], ecx |
||||
cmp eax, ebx |
||||
jge LeaveNow |
||||
mov esi, [window] |
||||
add esi, eax |
||||
mov [windowbestlen], esi |
||||
movzx ebx, word ptr [edi + eax - 1] |
||||
mov edi, [edx + dsPrev] |
||||
mov [scanend], ebx |
||||
mov edx, [chainlenwmask] |
||||
jmp LookupLoop |
||||
|
||||
;;; Accept the current string, with the maximum possible length. |
||||
|
||||
LenMaximum: mov edx, [deflatestate] |
||||
mov dword ptr [bestlen], MAX_MATCH |
||||
mov [edx + dsMatchStart], ecx |
||||
|
||||
;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; |
||||
;;; return s->lookahead; |
||||
|
||||
LeaveNow: |
||||
mov edx, [deflatestate] |
||||
mov ebx, [bestlen] |
||||
mov eax, [edx + dsLookahead] |
||||
cmp ebx, eax |
||||
jg LookaheadRet |
||||
mov eax, ebx |
||||
LookaheadRet: |
||||
|
||||
;;; Restore the stack and return from whence we came. |
||||
|
||||
add esp, LocalVarsSize |
||||
pop ebx |
||||
pop esi |
||||
pop edi |
||||
pop ebp |
||||
|
||||
ret |
||||
; please don't remove this string ! |
||||
; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary! |
||||
db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah |
||||
|
||||
|
||||
IFDEF NOOLDPENTIUMCODE |
||||
IFDEF NOUNDERLINE |
||||
longest_match endp |
||||
ELSE |
||||
_longest_match endp |
||||
ENDIF |
||||
|
||||
IFDEF NOUNDERLINE |
||||
match_init proc near |
||||
ret |
||||
match_init endp |
||||
ELSE |
||||
_match_init proc near |
||||
ret |
||||
_match_init endp |
||||
ENDIF |
||||
ELSE |
||||
IFDEF NOUNDERLINE |
||||
longest_match_686 endp |
||||
ELSE |
||||
_longest_match_686 endp |
||||
ENDIF |
||||
ENDIF |
||||
|
||||
_TEXT ends |
||||
end |
@ -1,62 +0,0 @@ |
||||
/* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86
|
||||
* Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. |
||||
* File written by Gilles Vollant, by modifiying the longest_match |
||||
* from Jean-loup Gailly in deflate.c |
||||
* it prepare all parameters and call the assembly longest_match_gvasm |
||||
* longest_match execute standard C code is wmask != 0x7fff |
||||
* (assembly code is faster with a fixed wmask) |
||||
* |
||||
* Read comment at beginning of gvmat32.asm for more information |
||||
*/ |
||||
|
||||
#if defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) |
||||
#include "deflate.h" |
||||
|
||||
/* if your C compiler don't add underline before function name,
|
||||
define ADD_UNDERLINE_ASMFUNC */ |
||||
#ifdef ADD_UNDERLINE_ASMFUNC |
||||
#define longest_match_7fff _longest_match_7fff |
||||
#define longest_match_686 _longest_match_686 |
||||
#define cpudetect32 _cpudetect32 |
||||
#endif |
||||
|
||||
|
||||
unsigned long cpudetect32(); |
||||
|
||||
uInt longest_match_c( |
||||
deflate_state *s, |
||||
IPos cur_match); /* current match */ |
||||
|
||||
|
||||
uInt longest_match_7fff( |
||||
deflate_state *s, |
||||
IPos cur_match); /* current match */ |
||||
|
||||
uInt longest_match_686( |
||||
deflate_state *s, |
||||
IPos cur_match); /* current match */ |
||||
|
||||
|
||||
static uInt iIsPPro=2; |
||||
|
||||
void match_init () |
||||
{ |
||||
iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0; |
||||
} |
||||
|
||||
uInt longest_match( |
||||
deflate_state *s, |
||||
IPos cur_match) /* current match */ |
||||
{ |
||||
if (iIsPPro!=0) |
||||
return longest_match_686(s,cur_match); |
||||
|
||||
if (s->w_mask != 0x7fff) |
||||
return longest_match_686(s,cur_match); |
||||
|
||||
/* now ((s->w_mask == 0x7fff) && (iIsPPro==0)) */ |
||||
return longest_match_7fff(s,cur_match); |
||||
} |
||||
|
||||
|
||||
#endif /* defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) */ |
@ -0,0 +1,478 @@ |
||||
; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86 |
||||
; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant. |
||||
; File written by Gilles Vollant, by converting match686.S from Brian Raiter |
||||
; for MASM. This is as assembly version of longest_match |
||||
; from Jean-loup Gailly in deflate.c |
||||
; |
||||
; http://www.zlib.net |
||||
; http://www.winimage.com/zLibDll |
||||
; http://www.muppetlabs.com/~breadbox/software/assembly.html |
||||
; |
||||
; For Visual C++ 4.x and higher and ML 6.x and higher |
||||
; ml.exe is distributed in |
||||
; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64 |
||||
; |
||||
; this file contain two implementation of longest_match |
||||
; |
||||
; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro |
||||
; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom) |
||||
; |
||||
; for using an assembly version of longest_match, you need define ASMV in project |
||||
; |
||||
; compile the asm file running |
||||
; ml /coff /Zi /c /Flmatch686.lst match686.asm |
||||
; and do not include match686.obj in your project |
||||
; |
||||
; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for |
||||
; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor |
||||
; with autoselect (with cpu detection code) |
||||
; if you want support the old pentium optimization, you can still use these version |
||||
; |
||||
; this file is not optimized for old pentium, but it compatible with all x86 32 bits |
||||
; processor (starting 80386) |
||||
; |
||||
; |
||||
; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 |
||||
|
||||
;uInt longest_match(s, cur_match) |
||||
; deflate_state *s; |
||||
; IPos cur_match; /* current match */ |
||||
|
||||
NbStack equ 76 |
||||
cur_match equ dword ptr[esp+NbStack-0] |
||||
str_s equ dword ptr[esp+NbStack-4] |
||||
; 5 dword on top (ret,ebp,esi,edi,ebx) |
||||
adrret equ dword ptr[esp+NbStack-8] |
||||
pushebp equ dword ptr[esp+NbStack-12] |
||||
pushedi equ dword ptr[esp+NbStack-16] |
||||
pushesi equ dword ptr[esp+NbStack-20] |
||||
pushebx equ dword ptr[esp+NbStack-24] |
||||
|
||||
chain_length equ dword ptr [esp+NbStack-28] |
||||
limit equ dword ptr [esp+NbStack-32] |
||||
best_len equ dword ptr [esp+NbStack-36] |
||||
window equ dword ptr [esp+NbStack-40] |
||||
prev equ dword ptr [esp+NbStack-44] |
||||
scan_start equ word ptr [esp+NbStack-48] |
||||
wmask equ dword ptr [esp+NbStack-52] |
||||
match_start_ptr equ dword ptr [esp+NbStack-56] |
||||
nice_match equ dword ptr [esp+NbStack-60] |
||||
scan equ dword ptr [esp+NbStack-64] |
||||
|
||||
windowlen equ dword ptr [esp+NbStack-68] |
||||
match_start equ dword ptr [esp+NbStack-72] |
||||
strend equ dword ptr [esp+NbStack-76] |
||||
NbStackAdd equ (NbStack-24) |
||||
|
||||
.386p |
||||
|
||||
name gvmatch |
||||
.MODEL FLAT |
||||
|
||||
|
||||
|
||||
; all the +zlib1222add offsets are due to the addition of fields |
||||
; in zlib in the deflate_state structure since the asm code was first written |
||||
; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). |
||||
; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). |
||||
; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). |
||||
|
||||
zlib1222add equ 8 |
||||
|
||||
; Note : these value are good with a 8 bytes boundary pack structure |
||||
dep_chain_length equ 74h+zlib1222add |
||||
dep_window equ 30h+zlib1222add |
||||
dep_strstart equ 64h+zlib1222add |
||||
dep_prev_length equ 70h+zlib1222add |
||||
dep_nice_match equ 88h+zlib1222add |
||||
dep_w_size equ 24h+zlib1222add |
||||
dep_prev equ 38h+zlib1222add |
||||
dep_w_mask equ 2ch+zlib1222add |
||||
dep_good_match equ 84h+zlib1222add |
||||
dep_match_start equ 68h+zlib1222add |
||||
dep_lookahead equ 6ch+zlib1222add |
||||
|
||||
|
||||
_TEXT segment |
||||
|
||||
IFDEF NOUNDERLINE |
||||
public longest_match |
||||
public match_init |
||||
ELSE |
||||
public _longest_match |
||||
public _match_init |
||||
ENDIF |
||||
|
||||
MAX_MATCH equ 258 |
||||
MIN_MATCH equ 3 |
||||
MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) |
||||
|
||||
|
||||
|
||||
MAX_MATCH equ 258 |
||||
MIN_MATCH equ 3 |
||||
MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) |
||||
MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) |
||||
|
||||
|
||||
;;; stack frame offsets |
||||
|
||||
chainlenwmask equ esp + 0 ; high word: current chain len |
||||
; low word: s->wmask |
||||
window equ esp + 4 ; local copy of s->window |
||||
windowbestlen equ esp + 8 ; s->window + bestlen |
||||
scanstart equ esp + 16 ; first two bytes of string |
||||
scanend equ esp + 12 ; last two bytes of string |
||||
scanalign equ esp + 20 ; dword-misalignment of string |
||||
nicematch equ esp + 24 ; a good enough match size |
||||
bestlen equ esp + 28 ; size of best match so far |
||||
scan equ esp + 32 ; ptr to string wanting match |
||||
|
||||
LocalVarsSize equ 36 |
||||
; saved ebx byte esp + 36 |
||||
; saved edi byte esp + 40 |
||||
; saved esi byte esp + 44 |
||||
; saved ebp byte esp + 48 |
||||
; return address byte esp + 52 |
||||
deflatestate equ esp + 56 ; the function arguments |
||||
curmatch equ esp + 60 |
||||
|
||||
;;; Offsets for fields in the deflate_state structure. These numbers |
||||
;;; are calculated from the definition of deflate_state, with the |
||||
;;; assumption that the compiler will dword-align the fields. (Thus, |
||||
;;; changing the definition of deflate_state could easily cause this |
||||
;;; program to crash horribly, without so much as a warning at |
||||
;;; compile time. Sigh.) |
||||
|
||||
dsWSize equ 36+zlib1222add |
||||
dsWMask equ 44+zlib1222add |
||||
dsWindow equ 48+zlib1222add |
||||
dsPrev equ 56+zlib1222add |
||||
dsMatchLen equ 88+zlib1222add |
||||
dsPrevMatch equ 92+zlib1222add |
||||
dsStrStart equ 100+zlib1222add |
||||
dsMatchStart equ 104+zlib1222add |
||||
dsLookahead equ 108+zlib1222add |
||||
dsPrevLen equ 112+zlib1222add |
||||
dsMaxChainLen equ 116+zlib1222add |
||||
dsGoodMatch equ 132+zlib1222add |
||||
dsNiceMatch equ 136+zlib1222add |
||||
|
||||
|
||||
;;; match686.asm -- Pentium-Pro-optimized version of longest_match() |
||||
;;; Written for zlib 1.1.2 |
||||
;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
||||
;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html |
||||
;;; |
||||
;; |
||||
;; This software is provided 'as-is', without any express or implied |
||||
;; warranty. In no event will the authors be held liable for any damages |
||||
;; arising from the use of this software. |
||||
;; |
||||
;; Permission is granted to anyone to use this software for any purpose, |
||||
;; including commercial applications, and to alter it and redistribute it |
||||
;; freely, subject to the following restrictions: |
||||
;; |
||||
;; 1. The origin of this software must not be misrepresented; you must not |
||||
;; claim that you wrote the original software. If you use this software |
||||
;; in a product, an acknowledgment in the product documentation would be |
||||
;; appreciated but is not required. |
||||
;; 2. Altered source versions must be plainly marked as such, and must not be |
||||
;; misrepresented as being the original software |
||||
;; 3. This notice may not be removed or altered from any source distribution. |
||||
;; |
||||
|
||||
;GLOBAL _longest_match, _match_init |
||||
|
||||
|
||||
;SECTION .text |
||||
|
||||
;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
||||
|
||||
;_longest_match: |
||||
IFDEF NOUNDERLINE |
||||
longest_match proc near |
||||
ELSE |
||||
_longest_match proc near |
||||
ENDIF |
||||
|
||||
;;; Save registers that the compiler may be using, and adjust esp to |
||||
;;; make room for our stack frame. |
||||
|
||||
push ebp |
||||
push edi |
||||
push esi |
||||
push ebx |
||||
sub esp, LocalVarsSize |
||||
|
||||
;;; Retrieve the function arguments. ecx will hold cur_match |
||||
;;; throughout the entire function. edx will hold the pointer to the |
||||
;;; deflate_state structure during the function's setup (before |
||||
;;; entering the main loop. |
||||
|
||||
mov edx, [deflatestate] |
||||
mov ecx, [curmatch] |
||||
|
||||
;;; uInt wmask = s->w_mask; |
||||
;;; unsigned chain_length = s->max_chain_length; |
||||
;;; if (s->prev_length >= s->good_match) { |
||||
;;; chain_length >>= 2; |
||||
;;; } |
||||
|
||||
mov eax, [edx + dsPrevLen] |
||||
mov ebx, [edx + dsGoodMatch] |
||||
cmp eax, ebx |
||||
mov eax, [edx + dsWMask] |
||||
mov ebx, [edx + dsMaxChainLen] |
||||
jl LastMatchGood |
||||
shr ebx, 2 |
||||
LastMatchGood: |
||||
|
||||
;;; chainlen is decremented once beforehand so that the function can |
||||
;;; use the sign flag instead of the zero flag for the exit test. |
||||
;;; It is then shifted into the high word, to make room for the wmask |
||||
;;; value, which it will always accompany. |
||||
|
||||
dec ebx |
||||
shl ebx, 16 |
||||
or ebx, eax |
||||
mov [chainlenwmask], ebx |
||||
|
||||
;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
||||
|
||||
mov eax, [edx + dsNiceMatch] |
||||
mov ebx, [edx + dsLookahead] |
||||
cmp ebx, eax |
||||
jl LookaheadLess |
||||
mov ebx, eax |
||||
LookaheadLess: mov [nicematch], ebx |
||||
|
||||
;;; register Bytef *scan = s->window + s->strstart; |
||||
|
||||
mov esi, [edx + dsWindow] |
||||
mov [window], esi |
||||
mov ebp, [edx + dsStrStart] |
||||
lea edi, [esi + ebp] |
||||
mov [scan], edi |
||||
|
||||
;;; Determine how many bytes the scan ptr is off from being |
||||
;;; dword-aligned. |
||||
|
||||
mov eax, edi |
||||
neg eax |
||||
and eax, 3 |
||||
mov [scanalign], eax |
||||
|
||||
;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
||||
;;; s->strstart - (IPos)MAX_DIST(s) : NIL; |
||||
|
||||
mov eax, [edx + dsWSize] |
||||
sub eax, MIN_LOOKAHEAD |
||||
sub ebp, eax |
||||
jg LimitPositive |
||||
xor ebp, ebp |
||||
LimitPositive: |
||||
|
||||
;;; int best_len = s->prev_length; |
||||
|
||||
mov eax, [edx + dsPrevLen] |
||||
mov [bestlen], eax |
||||
|
||||
;;; Store the sum of s->window + best_len in esi locally, and in esi. |
||||
|
||||
add esi, eax |
||||
mov [windowbestlen], esi |
||||
|
||||
;;; register ush scan_start = *(ushf*)scan; |
||||
;;; register ush scan_end = *(ushf*)(scan+best_len-1); |
||||
;;; Posf *prev = s->prev; |
||||
|
||||
movzx ebx, word ptr [edi] |
||||
mov [scanstart], ebx |
||||
movzx ebx, word ptr [edi + eax - 1] |
||||
mov [scanend], ebx |
||||
mov edi, [edx + dsPrev] |
||||
|
||||
;;; Jump into the main loop. |
||||
|
||||
mov edx, [chainlenwmask] |
||||
jmp short LoopEntry |
||||
|
||||
align 4 |
||||
|
||||
;;; do { |
||||
;;; match = s->window + cur_match; |
||||
;;; if (*(ushf*)(match+best_len-1) != scan_end || |
||||
;;; *(ushf*)match != scan_start) continue; |
||||
;;; [...] |
||||
;;; } while ((cur_match = prev[cur_match & wmask]) > limit |
||||
;;; && --chain_length != 0); |
||||
;;; |
||||
;;; Here is the inner loop of the function. The function will spend the |
||||
;;; majority of its time in this loop, and majority of that time will |
||||
;;; be spent in the first ten instructions. |
||||
;;; |
||||
;;; Within this loop: |
||||
;;; ebx = scanend |
||||
;;; ecx = curmatch |
||||
;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) |
||||
;;; esi = windowbestlen - i.e., (window + bestlen) |
||||
;;; edi = prev |
||||
;;; ebp = limit |
||||
|
||||
LookupLoop: |
||||
and ecx, edx |
||||
movzx ecx, word ptr [edi + ecx*2] |
||||
cmp ecx, ebp |
||||
jbe LeaveNow |
||||
sub edx, 00010000h |
||||
js LeaveNow |
||||
LoopEntry: movzx eax, word ptr [esi + ecx - 1] |
||||
cmp eax, ebx |
||||
jnz LookupLoop |
||||
mov eax, [window] |
||||
movzx eax, word ptr [eax + ecx] |
||||
cmp eax, [scanstart] |
||||
jnz LookupLoop |
||||
|
||||
;;; Store the current value of chainlen. |
||||
|
||||
mov [chainlenwmask], edx |
||||
|
||||
;;; Point edi to the string under scrutiny, and esi to the string we |
||||
;;; are hoping to match it up with. In actuality, esi and edi are |
||||
;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is |
||||
;;; initialized to -(MAX_MATCH_8 - scanalign). |
||||
|
||||
mov esi, [window] |
||||
mov edi, [scan] |
||||
add esi, ecx |
||||
mov eax, [scanalign] |
||||
mov edx, 0fffffef8h; -(MAX_MATCH_8) |
||||
lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] |
||||
lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] |
||||
|
||||
;;; Test the strings for equality, 8 bytes at a time. At the end, |
||||
;;; adjust edx so that it is offset to the exact byte that mismatched. |
||||
;;; |
||||
;;; We already know at this point that the first three bytes of the |
||||
;;; strings match each other, and they can be safely passed over before |
||||
;;; starting the compare loop. So what this code does is skip over 0-3 |
||||
;;; bytes, as much as necessary in order to dword-align the edi |
||||
;;; pointer. (esi will still be misaligned three times out of four.) |
||||
;;; |
||||
;;; It should be confessed that this loop usually does not represent |
||||
;;; much of the total running time. Replacing it with a more |
||||
;;; straightforward "rep cmpsb" would not drastically degrade |
||||
;;; performance. |
||||
|
||||
LoopCmps: |
||||
mov eax, [esi + edx] |
||||
xor eax, [edi + edx] |
||||
jnz LeaveLoopCmps |
||||
mov eax, [esi + edx + 4] |
||||
xor eax, [edi + edx + 4] |
||||
jnz LeaveLoopCmps4 |
||||
add edx, 8 |
||||
jnz LoopCmps |
||||
jmp short LenMaximum |
||||
LeaveLoopCmps4: add edx, 4 |
||||
LeaveLoopCmps: test eax, 0000FFFFh |
||||
jnz LenLower |
||||
add edx, 2 |
||||
shr eax, 16 |
||||
LenLower: sub al, 1 |
||||
adc edx, 0 |
||||
|
||||
;;; Calculate the length of the match. If it is longer than MAX_MATCH, |
||||
;;; then automatically accept it as the best possible match and leave. |
||||
|
||||
lea eax, [edi + edx] |
||||
mov edi, [scan] |
||||
sub eax, edi |
||||
cmp eax, MAX_MATCH |
||||
jge LenMaximum |
||||
|
||||
;;; If the length of the match is not longer than the best match we |
||||
;;; have so far, then forget it and return to the lookup loop. |
||||
|
||||
mov edx, [deflatestate] |
||||
mov ebx, [bestlen] |
||||
cmp eax, ebx |
||||
jg LongerMatch |
||||
mov esi, [windowbestlen] |
||||
mov edi, [edx + dsPrev] |
||||
mov ebx, [scanend] |
||||
mov edx, [chainlenwmask] |
||||
jmp LookupLoop |
||||
|
||||
;;; s->match_start = cur_match; |
||||
;;; best_len = len; |
||||
;;; if (len >= nice_match) break; |
||||
;;; scan_end = *(ushf*)(scan+best_len-1); |
||||
|
||||
LongerMatch: mov ebx, [nicematch] |
||||
mov [bestlen], eax |
||||
mov [edx + dsMatchStart], ecx |
||||
cmp eax, ebx |
||||
jge LeaveNow |
||||
mov esi, [window] |
||||
add esi, eax |
||||
mov [windowbestlen], esi |
||||
movzx ebx, word ptr [edi + eax - 1] |
||||
mov edi, [edx + dsPrev] |
||||
mov [scanend], ebx |
||||
mov edx, [chainlenwmask] |
||||
jmp LookupLoop |
||||
|
||||
;;; Accept the current string, with the maximum possible length. |
||||
|
||||
LenMaximum: mov edx, [deflatestate] |
||||
mov dword ptr [bestlen], MAX_MATCH |
||||
mov [edx + dsMatchStart], ecx |
||||
|
||||
;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; |
||||
;;; return s->lookahead; |
||||
|
||||
LeaveNow: |
||||
mov edx, [deflatestate] |
||||
mov ebx, [bestlen] |
||||
mov eax, [edx + dsLookahead] |
||||
cmp ebx, eax |
||||
jg LookaheadRet |
||||
mov eax, ebx |
||||
LookaheadRet: |
||||
|
||||
;;; Restore the stack and return from whence we came. |
||||
|
||||
add esp, LocalVarsSize |
||||
pop ebx |
||||
pop esi |
||||
pop edi |
||||
pop ebp |
||||
|
||||
ret |
||||
; please don't remove this string ! |
||||
; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary! |
||||
db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah |
||||
|
||||
|
||||
IFDEF NOUNDERLINE |
||||
longest_match endp |
||||
ELSE |
||||
_longest_match endp |
||||
ENDIF |
||||
|
||||
IFDEF NOUNDERLINE |
||||
match_init proc near |
||||
ret |
||||
match_init endp |
||||
ELSE |
||||
_match_init proc near |
||||
ret |
||||
_match_init endp |
||||
ENDIF |
||||
|
||||
|
||||
_TEXT ends |
||||
end |
Binary file not shown.
@ -1,3 +0,0 @@ |
||||
cl /DASMV /I..\.. /O2 /c gvmat32c.c |
||||
ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm |
||||
ml /coff /Zi /c /Flinffas32.lst inffas32.asm |
@ -1,114 +0,0 @@ |
||||
|
||||
VERSION 1.23 |
||||
|
||||
HEAPSIZE 1048576,8192 |
||||
|
||||
EXPORTS |
||||
adler32 @1 |
||||
compress @2 |
||||
crc32 @3 |
||||
deflate @4 |
||||
deflateCopy @5 |
||||
deflateEnd @6 |
||||
deflateInit2_ @7 |
||||
deflateInit_ @8 |
||||
deflateParams @9 |
||||
deflateReset @10 |
||||
deflateSetDictionary @11 |
||||
gzclose @12 |
||||
gzdopen @13 |
||||
gzerror @14 |
||||
gzflush @15 |
||||
gzopen @16 |
||||
gzread @17 |
||||
gzwrite @18 |
||||
inflate @19 |
||||
inflateEnd @20 |
||||
inflateInit2_ @21 |
||||
inflateInit_ @22 |
||||
inflateReset @23 |
||||
inflateSetDictionary @24 |
||||
inflateSync @25 |
||||
uncompress @26 |
||||
zlibVersion @27 |
||||
gzprintf @28 |
||||
gzputc @29 |
||||
gzgetc @30 |
||||
gzseek @31 |
||||
gzrewind @32 |
||||
gztell @33 |
||||
gzeof @34 |
||||
gzsetparams @35 |
||||
zError @36 |
||||
inflateSyncPoint @37 |
||||
get_crc_table @38 |
||||
compress2 @39 |
||||
gzputs @40 |
||||
gzgets @41 |
||||
inflateCopy @42 |
||||
inflateBackInit_ @43 |
||||
inflateBack @44 |
||||
inflateBackEnd @45 |
||||
compressBound @46 |
||||
deflateBound @47 |
||||
gzclearerr @48 |
||||
gzungetc @49 |
||||
zlibCompileFlags @50 |
||||
deflatePrime @51 |
||||
|
||||
unzOpen @61 |
||||
unzClose @62 |
||||
unzGetGlobalInfo @63 |
||||
unzGetCurrentFileInfo @64 |
||||
unzGoToFirstFile @65 |
||||
unzGoToNextFile @66 |
||||
unzOpenCurrentFile @67 |
||||
unzReadCurrentFile @68 |
||||
unzOpenCurrentFile3 @69 |
||||
unztell @70 |
||||
unzeof @71 |
||||
unzCloseCurrentFile @72 |
||||
unzGetGlobalComment @73 |
||||
unzStringFileNameCompare @74 |
||||
unzLocateFile @75 |
||||
unzGetLocalExtrafield @76 |
||||
unzOpen2 @77 |
||||
unzOpenCurrentFile2 @78 |
||||
unzOpenCurrentFilePassword @79 |
||||
|
||||
zipOpen @80 |
||||
zipOpenNewFileInZip @81 |
||||
zipWriteInFileInZip @82 |
||||
zipCloseFileInZip @83 |
||||
zipClose @84 |
||||
zipOpenNewFileInZip2 @86 |
||||
zipCloseFileInZipRaw @87 |
||||
zipOpen2 @88 |
||||
zipOpenNewFileInZip3 @89 |
||||
|
||||
unzGetFilePos @100 |
||||
unzGoToFilePos @101 |
||||
|
||||
fill_win32_filefunc @110 |
||||
fill_win32_filefunc64 @111 |
||||
fill_win32_filefunc64A @112 |
||||
fill_win32_filefunc64W @113 |
||||
|
||||
; quick hack by hkuno@microhouse.co.jp |
||||
unzOpen64 @120 |
||||
unzOpen2_64 @121 |
||||
unzGetGlobalInfo64 @122 |
||||
unzGetCurrentFileInfo64 @124 |
||||
unzGetCurrentFileZStreamPos64 @125 |
||||
unztell64 @126 |
||||
unzGetFilePos64 @127 |
||||
unzGoToFilePos64 @128 |
||||
|
||||
zipOpen64 @130 |
||||
zipOpen2_64 @131 |
||||
zipOpenNewFileInZip64 @132 |
||||
zipOpenNewFileInZip2_64 @133 |
||||
zipOpenNewFileInZip3_64 @134 |
||||
zipOpenNewFileInZip4_64 @135 |
||||
zipCloseFileInZipRaw64 @136 |
||||
; end hack |
@ -1,114 +0,0 @@ |
||||
|
||||
VERSION 1.23 |
||||
|
||||
HEAPSIZE 1048576,8192 |
||||
|
||||
EXPORTS |
||||
adler32 @1 |
||||
compress @2 |
||||
crc32 @3 |
||||
deflate @4 |
||||
deflateCopy @5 |
||||
deflateEnd @6 |
||||
deflateInit2_ @7 |
||||
deflateInit_ @8 |
||||
deflateParams @9 |
||||
deflateReset @10 |
||||
deflateSetDictionary @11 |
||||
gzclose @12 |
||||
gzdopen @13 |
||||
gzerror @14 |
||||
gzflush @15 |
||||
gzopen @16 |
||||
gzread @17 |
||||
gzwrite @18 |
||||
inflate @19 |
||||
inflateEnd @20 |
||||
inflateInit2_ @21 |
||||
inflateInit_ @22 |
||||
inflateReset @23 |
||||
inflateSetDictionary @24 |
||||
inflateSync @25 |
||||
uncompress @26 |
||||
zlibVersion @27 |
||||
gzprintf @28 |
||||
gzputc @29 |
||||
gzgetc @30 |
||||
gzseek @31 |
||||
gzrewind @32 |
||||
gztell @33 |
||||
gzeof @34 |
||||
gzsetparams @35 |
||||
zError @36 |
||||
inflateSyncPoint @37 |
||||
get_crc_table @38 |
||||
compress2 @39 |
||||
gzputs @40 |
||||
gzgets @41 |
||||
inflateCopy @42 |
||||
inflateBackInit_ @43 |
||||
inflateBack @44 |
||||
inflateBackEnd @45 |
||||
compressBound @46 |
||||
deflateBound @47 |
||||
gzclearerr @48 |
||||
gzungetc @49 |
||||
zlibCompileFlags @50 |
||||
deflatePrime @51 |
||||
|
||||
unzOpen @61 |
||||
unzClose @62 |
||||
unzGetGlobalInfo @63 |
||||
unzGetCurrentFileInfo @64 |
||||
unzGoToFirstFile @65 |
||||
unzGoToNextFile @66 |
||||
unzOpenCurrentFile @67 |
||||
unzReadCurrentFile @68 |
||||
unzOpenCurrentFile3 @69 |
||||
unztell @70 |
||||
unzeof @71 |
||||
unzCloseCurrentFile @72 |
||||
unzGetGlobalComment @73 |
||||
unzStringFileNameCompare @74 |
||||
unzLocateFile @75 |
||||
unzGetLocalExtrafield @76 |
||||
unzOpen2 @77 |
||||
unzOpenCurrentFile2 @78 |
||||
unzOpenCurrentFilePassword @79 |
||||
|
||||
zipOpen @80 |
||||
zipOpenNewFileInZip @81 |
||||
zipWriteInFileInZip @82 |
||||
zipCloseFileInZip @83 |
||||
zipClose @84 |
||||
zipOpenNewFileInZip2 @86 |
||||
zipCloseFileInZipRaw @87 |
||||
zipOpen2 @88 |
||||
zipOpenNewFileInZip3 @89 |
||||
|
||||
unzGetFilePos @100 |
||||
unzGoToFilePos @101 |
||||
|
||||
fill_win32_filefunc @110 |
||||
fill_win32_filefunc64 @111 |
||||
fill_win32_filefunc64A @112 |
||||
fill_win32_filefunc64W @113 |
||||
|
||||
; quick hack by hkuno@microhouse.co.jp |
||||
unzOpen64 @120 |
||||
unzOpen2_64 @121 |
||||
unzGetGlobalInfo64 @122 |
||||
unzGetCurrentFileInfo64 @124 |
||||
unzGetCurrentFileZStreamPos64 @125 |
||||
unztell64 @126 |
||||
unzGetFilePos64 @127 |
||||
unzGoToFilePos64 @128 |
||||
|
||||
zipOpen64 @130 |
||||
zipOpen2_64 @131 |
||||
zipOpenNewFileInZip64 @132 |
||||
zipOpenNewFileInZip2_64 @133 |
||||
zipOpenNewFileInZip3_64 @134 |
||||
zipOpenNewFileInZip4_64 @135 |
||||
zipCloseFileInZipRaw64 @136 |
||||
; end hack |
@ -0,0 +1,416 @@ |
||||
/* zconf.h -- configuration of the zlib compression library
|
||||
* Copyright (C) 1995-2010 Jean-loup Gailly. |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
/* @(#) $Id$ */ |
||||
|
||||
#ifndef ZCONF_H |
||||
#define ZCONF_H |
||||
|
||||
/*
|
||||
* If you *really* need a unique prefix for all types and library functions, |
||||
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. |
||||
* Even better than compiling with -DZ_PREFIX would be to use configure to set |
||||
* this permanently in zconf.h using "./configure --zprefix". |
||||
*/ |
||||
#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ |
||||
|
||||
/* all linked symbols */ |
||||
# define _dist_code z__dist_code |
||||
# define _length_code z__length_code |
||||
# define _tr_align z__tr_align |
||||
# define _tr_flush_block z__tr_flush_block |
||||
# define _tr_init z__tr_init |
||||
# define _tr_stored_block z__tr_stored_block |
||||
# define _tr_tally z__tr_tally |
||||
# define adler32 z_adler32 |
||||
# define adler32_combine z_adler32_combine |
||||
# define adler32_combine64 z_adler32_combine64 |
||||
# define compress z_compress |
||||
# define compress2 z_compress2 |
||||
# define compressBound z_compressBound |
||||
# define crc32 z_crc32 |
||||
# define crc32_combine z_crc32_combine |
||||
# define crc32_combine64 z_crc32_combine64 |
||||
# define deflate z_deflate |
||||
# define deflateBound z_deflateBound |
||||
# define deflateCopy z_deflateCopy |
||||
# define deflateEnd z_deflateEnd |
||||
# define deflateInit2_ z_deflateInit2_ |
||||
# define deflateInit_ z_deflateInit_ |
||||
# define deflateParams z_deflateParams |
||||
# define deflatePrime z_deflatePrime |
||||
# define deflateReset z_deflateReset |
||||
# define deflateSetDictionary z_deflateSetDictionary |
||||
# define deflateSetHeader z_deflateSetHeader |
||||
# define deflateTune z_deflateTune |
||||
# define deflate_copyright z_deflate_copyright |
||||
# define get_crc_table z_get_crc_table |
||||
# define gz_error z_gz_error |
||||
# define gz_intmax z_gz_intmax |
||||
# define gz_strwinerror z_gz_strwinerror |
||||
# define gzbuffer z_gzbuffer |
||||
# define gzclearerr z_gzclearerr |
||||
# define gzclose z_gzclose |
||||
# define gzclose_r z_gzclose_r |
||||
# define gzclose_w z_gzclose_w |
||||
# define gzdirect z_gzdirect |
||||
# define gzdopen z_gzdopen |
||||
# define gzeof z_gzeof |
||||
# define gzerror z_gzerror |
||||
# define gzflush z_gzflush |
||||
# define gzgetc z_gzgetc |
||||
# define gzgets z_gzgets |
||||
# define gzoffset z_gzoffset |
||||
# define gzoffset64 z_gzoffset64 |
||||
# define gzopen z_gzopen |
||||
# define gzopen64 z_gzopen64 |
||||
# define gzprintf z_gzprintf |
||||
# define gzputc z_gzputc |
||||
# define gzputs z_gzputs |
||||
# define gzread z_gzread |
||||
# define gzrewind z_gzrewind |
||||
# define gzseek z_gzseek |
||||
# define gzseek64 z_gzseek64 |
||||
# define gzsetparams z_gzsetparams |
||||
# define gztell z_gztell |
||||
# define gztell64 z_gztell64 |
||||
# define gzungetc z_gzungetc |
||||
# define gzwrite z_gzwrite |
||||
# define inflate z_inflate |
||||
# define inflateBack z_inflateBack |
||||
# define inflateBackEnd z_inflateBackEnd |
||||
# define inflateBackInit_ z_inflateBackInit_ |
||||
# define inflateCopy z_inflateCopy |
||||
# define inflateEnd z_inflateEnd |
||||
# define inflateGetHeader z_inflateGetHeader |
||||
# define inflateInit2_ z_inflateInit2_ |
||||
# define inflateInit_ z_inflateInit_ |
||||
# define inflateMark z_inflateMark |
||||
# define inflatePrime z_inflatePrime |
||||
# define inflateReset z_inflateReset |
||||
# define inflateReset2 z_inflateReset2 |
||||
# define inflateSetDictionary z_inflateSetDictionary |
||||
# define inflateSync z_inflateSync |
||||
# define inflateSyncPoint z_inflateSyncPoint |
||||
# define inflateUndermine z_inflateUndermine |
||||
# define inflate_copyright z_inflate_copyright |
||||
# define inflate_fast z_inflate_fast |
||||
# define inflate_table z_inflate_table |
||||
# define uncompress z_uncompress |
||||
# define zError z_zError |
||||
# define zcalloc z_zcalloc |
||||
# define zcfree z_zcfree |
||||
# define zlibCompileFlags z_zlibCompileFlags |
||||
# define zlibVersion z_zlibVersion |
||||
|
||||
/* all zlib typedefs in zlib.h and zconf.h */ |
||||
# define Byte z_Byte |
||||
# define Bytef z_Bytef |
||||
# define alloc_func z_alloc_func |
||||
# define charf z_charf |
||||
# define free_func z_free_func |
||||
# define gzFile z_gzFile |
||||
# define gz_header z_gz_header |
||||
# define gz_headerp z_gz_headerp |
||||
# define in_func z_in_func |
||||
# define intf z_intf |
||||
# define out_func z_out_func |
||||
# define uInt z_uInt |
||||
# define uIntf z_uIntf |
||||
# define uLong z_uLong |
||||
# define uLongf z_uLongf |
||||
# define voidp z_voidp |
||||
# define voidpc z_voidpc |
||||
# define voidpf z_voidpf |
||||
|
||||
/* all zlib structs in zlib.h and zconf.h */ |
||||
# define gz_header_s z_gz_header_s |
||||
# define internal_state z_internal_state |
||||
|
||||
#endif |
||||
|
||||
#if defined(__MSDOS__) && !defined(MSDOS) |
||||
# define MSDOS |
||||
#endif |
||||
#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) |
||||
# define OS2 |
||||
#endif |
||||
#if defined(_WINDOWS) && !defined(WINDOWS) |
||||
# define WINDOWS |
||||
#endif |
||||
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) |
||||
# ifndef WIN32 |
||||
# define WIN32 |
||||
# endif |
||||
#endif |
||||
#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) |
||||
# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) |
||||
# ifndef SYS16BIT |
||||
# define SYS16BIT |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
/*
|
||||
* Compile with -DMAXSEG_64K if the alloc function cannot allocate more |
||||
* than 64k bytes at a time (needed on systems with 16-bit int). |
||||
*/ |
||||
#ifdef SYS16BIT |
||||
# define MAXSEG_64K |
||||
#endif |
||||
#ifdef MSDOS |
||||
# define UNALIGNED_OK |
||||
#endif |
||||
|
||||
#ifdef __STDC_VERSION__ |
||||
# ifndef STDC |
||||
# define STDC |
||||
# endif |
||||
# if __STDC_VERSION__ >= 199901L |
||||
# ifndef STDC99 |
||||
# define STDC99 |
||||
# endif |
||||
# endif |
||||
#endif |
||||
#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) |
||||
# define STDC |
||||
#endif |
||||
#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) |
||||
# define STDC |
||||
#endif |
||||
#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) |
||||
# define STDC |
||||
#endif |
||||
#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) |
||||
# define STDC |
||||
#endif |
||||
|
||||
#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ |
||||
# define STDC |
||||
#endif |
||||
|
||||
#ifndef STDC |
||||
# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ |
||||
# define const /* note: need a more gentle solution here */ |
||||
# endif |
||||
#endif |
||||
|
||||
/* Some Mac compilers merge all .h files incorrectly: */ |
||||
#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) |
||||
# define NO_DUMMY_DECL |
||||
#endif |
||||
|
||||
/* Maximum value for memLevel in deflateInit2 */ |
||||
#ifndef MAX_MEM_LEVEL |
||||
# ifdef MAXSEG_64K |
||||
# define MAX_MEM_LEVEL 8 |
||||
# else |
||||
# define MAX_MEM_LEVEL 9 |
||||
# endif |
||||
#endif |
||||
|
||||
/* Maximum value for windowBits in deflateInit2 and inflateInit2.
|
||||
* WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files |
||||
* created by gzip. (Files created by minigzip can still be extracted by |
||||
* gzip.) |
||||
*/ |
||||
#ifndef MAX_WBITS |
||||
# define MAX_WBITS 15 /* 32K LZ77 window */ |
||||
#endif |
||||
|
||||
/* The memory requirements for deflate are (in bytes):
|
||||
(1 << (windowBits+2)) + (1 << (memLevel+9)) |
||||
that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) |
||||
plus a few kilobytes for small objects. For example, if you want to reduce |
||||
the default memory requirements from 256K to 128K, compile with |
||||
make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" |
||||
Of course this will generally degrade compression (there's no free lunch). |
||||
|
||||
The memory requirements for inflate are (in bytes) 1 << windowBits |
||||
that is, 32K for windowBits=15 (default value) plus a few kilobytes |
||||
for small objects. |
||||
*/ |
||||
|
||||
/* Type declarations */ |
||||
|
||||
#ifndef OF /* function prototypes */ |
||||
# ifdef STDC |
||||
# define OF(args) args |
||||
# else |
||||
# define OF(args) () |
||||
# endif |
||||
#endif |
||||
|
||||
/* The following definitions for FAR are needed only for MSDOS mixed
|
||||
* model programming (small or medium model with some far allocations). |
||||
* This was tested only with MSC; for other MSDOS compilers you may have |
||||
* to define NO_MEMCPY in zutil.h. If you don't need the mixed model, |
||||
* just define FAR to be empty. |
||||
*/ |
||||
#ifdef SYS16BIT |
||||
# if defined(M_I86SM) || defined(M_I86MM) |
||||
/* MSC small or medium model */ |
||||
# define SMALL_MEDIUM |
||||
# ifdef _MSC_VER |
||||
# define FAR _far |
||||
# else |
||||
# define FAR far |
||||
# endif |
||||
# endif |
||||
# if (defined(__SMALL__) || defined(__MEDIUM__)) |
||||
/* Turbo C small or medium model */ |
||||
# define SMALL_MEDIUM |
||||
# ifdef __BORLANDC__ |
||||
# define FAR _far |
||||
# else |
||||
# define FAR far |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
#if defined(WINDOWS) || defined(WIN32) |
||||
/* If building or using zlib as a DLL, define ZLIB_DLL.
|
||||
* This is not mandatory, but it offers a little performance increase. |
||||
*/ |
||||
# ifdef ZLIB_DLL |
||||
# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) |
||||
# ifdef ZLIB_INTERNAL |
||||
# define ZEXTERN extern __declspec(dllexport) |
||||
# else |
||||
# define ZEXTERN extern __declspec(dllimport) |
||||
# endif |
||||
# endif |
||||
# endif /* ZLIB_DLL */ |
||||
/* If building or using zlib with the WINAPI/WINAPIV calling convention,
|
||||
* define ZLIB_WINAPI. |
||||
* Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. |
||||
*/ |
||||
# ifdef ZLIB_WINAPI |
||||
# ifdef FAR |
||||
# undef FAR |
||||
# endif |
||||
# include <windows.h> |
||||
/* No need for _export, use ZLIB.DEF instead. */ |
||||
/* For complete Windows compatibility, use WINAPI, not __stdcall. */ |
||||
# define ZEXPORT WINAPI |
||||
# ifdef WIN32 |
||||
# define ZEXPORTVA WINAPIV |
||||
# else |
||||
# define ZEXPORTVA FAR CDECL |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
#if defined (__BEOS__) |
||||
# ifdef ZLIB_DLL |
||||
# ifdef ZLIB_INTERNAL |
||||
# define ZEXPORT __declspec(dllexport) |
||||
# define ZEXPORTVA __declspec(dllexport) |
||||
# else |
||||
# define ZEXPORT __declspec(dllimport) |
||||
# define ZEXPORTVA __declspec(dllimport) |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
#ifdef HAVE_VISIBILITY_PRAGMA |
||||
# define ZEXTERN __attribute__((visibility ("default"))) extern |
||||
#endif |
||||
|
||||
#ifndef ZEXTERN |
||||
# define ZEXTERN extern |
||||
#endif |
||||
#ifndef ZEXPORT |
||||
# define ZEXPORT |
||||
#endif |
||||
#ifndef ZEXPORTVA |
||||
# define ZEXPORTVA |
||||
#endif |
||||
|
||||
#ifndef FAR |
||||
# define FAR |
||||
#endif |
||||
|
||||
#if !defined(__MACTYPES__) |
||||
typedef unsigned char Byte; /* 8 bits */ |
||||
#endif |
||||
typedef unsigned int uInt; /* 16 bits or more */ |
||||
typedef unsigned long uLong; /* 32 bits or more */ |
||||
|
||||
#ifdef SMALL_MEDIUM |
||||
/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ |
||||
# define Bytef Byte FAR |
||||
#else |
||||
typedef Byte FAR Bytef; |
||||
#endif |
||||
typedef char FAR charf; |
||||
typedef int FAR intf; |
||||
typedef uInt FAR uIntf; |
||||
typedef uLong FAR uLongf; |
||||
|
||||
#ifdef STDC |
||||
typedef void const *voidpc; |
||||
typedef void FAR *voidpf; |
||||
typedef void *voidp; |
||||
#else |
||||
typedef Byte const *voidpc; |
||||
typedef Byte FAR *voidpf; |
||||
typedef Byte *voidp; |
||||
#endif |
||||
|
||||
#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ |
||||
# define Z_HAVE_UNISTD_H |
||||
#endif |
||||
|
||||
#ifdef Z_HAVE_UNISTD_H |
||||
# include <sys/types.h> /* for off_t */ |
||||
# include <unistd.h> /* for SEEK_* and off_t */ |
||||
# ifdef VMS |
||||
# include <unixio.h> /* for off_t */ |
||||
# endif |
||||
# ifndef z_off_t |
||||
# define z_off_t off_t |
||||
# endif |
||||
#endif |
||||
|
||||
#ifdef _LARGEFILE64_SOURCE |
||||
# include <sys/types.h> |
||||
#endif |
||||
|
||||
#ifndef SEEK_SET |
||||
# define SEEK_SET 0 /* Seek from beginning of file. */ |
||||
# define SEEK_CUR 1 /* Seek from current position. */ |
||||
# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ |
||||
#endif |
||||
#ifndef z_off_t |
||||
# define z_off_t long |
||||
#endif |
||||
|
||||
#if defined(__OS400__) |
||||
# define NO_vsnprintf |
||||
#endif |
||||
|
||||
#if defined(__MVS__) |
||||
# define NO_vsnprintf |
||||
#endif |
||||
|
||||
/* MVS linker does not support external names larger than 8 bytes */ |
||||
#if defined(__MVS__) |
||||
#pragma map(deflateInit_,"DEIN") |
||||
#pragma map(deflateInit2_,"DEIN2") |
||||
#pragma map(deflateEnd,"DEEND") |
||||
#pragma map(deflateBound,"DEBND") |
||||
#pragma map(inflateInit_,"ININ") |
||||
#pragma map(inflateInit2_,"ININ2") |
||||
#pragma map(inflateEnd,"INEND") |
||||
#pragma map(inflateSync,"INSY") |
||||
#pragma map(inflateSetDictionary,"INSEDI") |
||||
#pragma map(compressBound,"CMBND") |
||||
#pragma map(inflate_table,"INTABL") |
||||
#pragma map(inflate_fast,"INFA") |
||||
#pragma map(inflate_copyright,"INCOPY") |
||||
#endif |
||||
|
||||
#endif /* ZCONF_H */ |
@ -0,0 +1,418 @@ |
||||
/* zconf.h -- configuration of the zlib compression library |
||||
* Copyright (C) 1995-2010 Jean-loup Gailly. |
||||
* For conditions of distribution and use, see copyright notice in zlib.h |
||||
*/ |
||||
|
||||
/* @(#) $Id$ */ |
||||
|
||||
#ifndef ZCONF_H |
||||
#define ZCONF_H |
||||
#cmakedefine Z_PREFIX |
||||
#cmakedefine Z_HAVE_UNISTD_H |
||||
|
||||
/* |
||||
* If you *really* need a unique prefix for all types and library functions, |
||||
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. |
||||
* Even better than compiling with -DZ_PREFIX would be to use configure to set |
||||
* this permanently in zconf.h using "./configure --zprefix". |
||||
*/ |
||||
#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ |
||||
|
||||
/* all linked symbols */ |
||||
# define _dist_code z__dist_code |
||||
# define _length_code z__length_code |
||||
# define _tr_align z__tr_align |
||||
# define _tr_flush_block z__tr_flush_block |
||||
# define _tr_init z__tr_init |
||||
# define _tr_stored_block z__tr_stored_block |
||||
# define _tr_tally z__tr_tally |
||||
# define adler32 z_adler32 |
||||
# define adler32_combine z_adler32_combine |
||||
# define adler32_combine64 z_adler32_combine64 |
||||
# define compress z_compress |
||||
# define compress2 z_compress2 |
||||
# define compressBound z_compressBound |
||||
# define crc32 z_crc32 |
||||
# define crc32_combine z_crc32_combine |
||||
# define crc32_combine64 z_crc32_combine64 |
||||
# define deflate z_deflate |
||||
# define deflateBound z_deflateBound |
||||
# define deflateCopy z_deflateCopy |
||||
# define deflateEnd z_deflateEnd |
||||
# define deflateInit2_ z_deflateInit2_ |
||||
# define deflateInit_ z_deflateInit_ |
||||
# define deflateParams z_deflateParams |
||||
# define deflatePrime z_deflatePrime |
||||
# define deflateReset z_deflateReset |
||||
# define deflateSetDictionary z_deflateSetDictionary |
||||
# define deflateSetHeader z_deflateSetHeader |
||||
# define deflateTune z_deflateTune |
||||
# define deflate_copyright z_deflate_copyright |
||||
# define get_crc_table z_get_crc_table |
||||
# define gz_error z_gz_error |
||||
# define gz_intmax z_gz_intmax |
||||
# define gz_strwinerror z_gz_strwinerror |
||||
# define gzbuffer z_gzbuffer |
||||
# define gzclearerr z_gzclearerr |
||||
# define gzclose z_gzclose |
||||
# define gzclose_r z_gzclose_r |
||||
# define gzclose_w z_gzclose_w |
||||
# define gzdirect z_gzdirect |
||||
# define gzdopen z_gzdopen |
||||
# define gzeof z_gzeof |
||||
# define gzerror z_gzerror |
||||
# define gzflush z_gzflush |
||||
# define gzgetc z_gzgetc |
||||
# define gzgets z_gzgets |
||||
# define gzoffset z_gzoffset |
||||
# define gzoffset64 z_gzoffset64 |
||||
# define gzopen z_gzopen |
||||
# define gzopen64 z_gzopen64 |
||||
# define gzprintf z_gzprintf |
||||
# define gzputc z_gzputc |
||||
# define gzputs z_gzputs |
||||
# define gzread z_gzread |
||||
# define gzrewind z_gzrewind |
||||
# define gzseek z_gzseek |
||||
# define gzseek64 z_gzseek64 |
||||
# define gzsetparams z_gzsetparams |
||||
# define gztell z_gztell |
||||
# define gztell64 z_gztell64 |
||||
# define gzungetc z_gzungetc |
||||
# define gzwrite z_gzwrite |
||||
# define inflate z_inflate |
||||
# define inflateBack z_inflateBack |
||||
# define inflateBackEnd z_inflateBackEnd |
||||
# define inflateBackInit_ z_inflateBackInit_ |
||||
# define inflateCopy z_inflateCopy |
||||
# define inflateEnd z_inflateEnd |
||||
# define inflateGetHeader z_inflateGetHeader |
||||
# define inflateInit2_ z_inflateInit2_ |
||||
# define inflateInit_ z_inflateInit_ |
||||
# define inflateMark z_inflateMark |
||||
# define inflatePrime z_inflatePrime |
||||
# define inflateReset z_inflateReset |
||||
# define inflateReset2 z_inflateReset2 |
||||
# define inflateSetDictionary z_inflateSetDictionary |
||||
# define inflateSync z_inflateSync |
||||
# define inflateSyncPoint z_inflateSyncPoint |
||||
# define inflateUndermine z_inflateUndermine |
||||
# define inflate_copyright z_inflate_copyright |
||||
# define inflate_fast z_inflate_fast |
||||
# define inflate_table z_inflate_table |
||||
# define uncompress z_uncompress |
||||
# define zError z_zError |
||||
# define zcalloc z_zcalloc |
||||
# define zcfree z_zcfree |
||||
# define zlibCompileFlags z_zlibCompileFlags |
||||
# define zlibVersion z_zlibVersion |
||||
|
||||
/* all zlib typedefs in zlib.h and zconf.h */ |
||||
# define Byte z_Byte |
||||
# define Bytef z_Bytef |
||||
# define alloc_func z_alloc_func |
||||
# define charf z_charf |
||||
# define free_func z_free_func |
||||
# define gzFile z_gzFile |
||||
# define gz_header z_gz_header |
||||
# define gz_headerp z_gz_headerp |
||||
# define in_func z_in_func |
||||
# define intf z_intf |
||||
# define out_func z_out_func |
||||
# define uInt z_uInt |
||||
# define uIntf z_uIntf |
||||
# define uLong z_uLong |
||||
# define uLongf z_uLongf |
||||
# define voidp z_voidp |
||||
# define voidpc z_voidpc |
||||
# define voidpf z_voidpf |
||||
|
||||
/* all zlib structs in zlib.h and zconf.h */ |
||||
# define gz_header_s z_gz_header_s |
||||
# define internal_state z_internal_state |
||||
|
||||
#endif |
||||
|
||||
#if defined(__MSDOS__) && !defined(MSDOS) |
||||
# define MSDOS |
||||
#endif |
||||
#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) |
||||
# define OS2 |
||||
#endif |
||||
#if defined(_WINDOWS) && !defined(WINDOWS) |
||||
# define WINDOWS |
||||
#endif |
||||
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) |
||||
# ifndef WIN32 |
||||
# define WIN32 |
||||
# endif |
||||
#endif |
||||
#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) |
||||
# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) |
||||
# ifndef SYS16BIT |
||||
# define SYS16BIT |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
/* |
||||
* Compile with -DMAXSEG_64K if the alloc function cannot allocate more |
||||
* than 64k bytes at a time (needed on systems with 16-bit int). |
||||
*/ |
||||
#ifdef SYS16BIT |
||||
# define MAXSEG_64K |
||||
#endif |
||||
#ifdef MSDOS |
||||
# define UNALIGNED_OK |
||||
#endif |
||||
|
||||
#ifdef __STDC_VERSION__ |
||||
# ifndef STDC |
||||
# define STDC |
||||
# endif |
||||
# if __STDC_VERSION__ >= 199901L |
||||
# ifndef STDC99 |
||||
# define STDC99 |
||||
# endif |
||||
# endif |
||||
#endif |
||||
#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) |
||||
# define STDC |
||||
#endif |
||||
#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) |
||||
# define STDC |
||||
#endif |
||||
#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) |
||||
# define STDC |
||||
#endif |
||||
#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) |
||||
# define STDC |
||||
#endif |
||||
|
||||
#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ |
||||
# define STDC |
||||
#endif |
||||
|
||||
#ifndef STDC |
||||
# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ |
||||
# define const /* note: need a more gentle solution here */ |
||||
# endif |
||||
#endif |
||||
|
||||
/* Some Mac compilers merge all .h files incorrectly: */ |
||||
#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) |
||||
# define NO_DUMMY_DECL |
||||
#endif |
||||
|
||||
/* Maximum value for memLevel in deflateInit2 */ |
||||
#ifndef MAX_MEM_LEVEL |
||||
# ifdef MAXSEG_64K |
||||
# define MAX_MEM_LEVEL 8 |
||||
# else |
||||
# define MAX_MEM_LEVEL 9 |
||||
# endif |
||||
#endif |
||||
|
||||
/* Maximum value for windowBits in deflateInit2 and inflateInit2. |
||||
* WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files |
||||
* created by gzip. (Files created by minigzip can still be extracted by |
||||
* gzip.) |
||||
*/ |
||||
#ifndef MAX_WBITS |
||||
# define MAX_WBITS 15 /* 32K LZ77 window */ |
||||
#endif |
||||
|
||||
/* The memory requirements for deflate are (in bytes): |
||||
(1 << (windowBits+2)) + (1 << (memLevel+9)) |
||||
that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) |
||||
plus a few kilobytes for small objects. For example, if you want to reduce |
||||
the default memory requirements from 256K to 128K, compile with |
||||
make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" |
||||
Of course this will generally degrade compression (there's no free lunch). |
||||
|
||||
The memory requirements for inflate are (in bytes) 1 << windowBits |
||||
that is, 32K for windowBits=15 (default value) plus a few kilobytes |
||||
for small objects. |
||||
*/ |
||||
|
||||
/* Type declarations */ |
||||
|
||||
#ifndef OF /* function prototypes */ |
||||
# ifdef STDC |
||||
# define OF(args) args |
||||
# else |
||||
# define OF(args) () |
||||
# endif |
||||
#endif |
||||
|
||||
/* The following definitions for FAR are needed only for MSDOS mixed |
||||
* model programming (small or medium model with some far allocations). |
||||
* This was tested only with MSC; for other MSDOS compilers you may have |
||||
* to define NO_MEMCPY in zutil.h. If you don't need the mixed model, |
||||
* just define FAR to be empty. |
||||
*/ |
||||
#ifdef SYS16BIT |
||||
# if defined(M_I86SM) || defined(M_I86MM) |
||||
/* MSC small or medium model */ |
||||
# define SMALL_MEDIUM |
||||
# ifdef _MSC_VER |
||||
# define FAR _far |
||||
# else |
||||
# define FAR far |
||||
# endif |
||||
# endif |
||||
# if (defined(__SMALL__) || defined(__MEDIUM__)) |
||||
/* Turbo C small or medium model */ |
||||
# define SMALL_MEDIUM |
||||
# ifdef __BORLANDC__ |
||||
# define FAR _far |
||||
# else |
||||
# define FAR far |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
#if defined(WINDOWS) || defined(WIN32) |
||||
/* If building or using zlib as a DLL, define ZLIB_DLL. |
||||
* This is not mandatory, but it offers a little performance increase. |
||||
*/ |
||||
# ifdef ZLIB_DLL |
||||
# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) |
||||
# ifdef ZLIB_INTERNAL |
||||
# define ZEXTERN extern __declspec(dllexport) |
||||
# else |
||||
# define ZEXTERN extern __declspec(dllimport) |
||||
# endif |
||||
# endif |
||||
# endif /* ZLIB_DLL */ |
||||
/* If building or using zlib with the WINAPI/WINAPIV calling convention, |
||||
* define ZLIB_WINAPI. |
||||
* Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. |
||||
*/ |
||||
# ifdef ZLIB_WINAPI |
||||
# ifdef FAR |
||||
# undef FAR |
||||
# endif |
||||
# include <windows.h> |
||||
/* No need for _export, use ZLIB.DEF instead. */ |
||||
/* For complete Windows compatibility, use WINAPI, not __stdcall. */ |
||||
# define ZEXPORT WINAPI |
||||
# ifdef WIN32 |
||||
# define ZEXPORTVA WINAPIV |
||||
# else |
||||
# define ZEXPORTVA FAR CDECL |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
#if defined (__BEOS__) |
||||
# ifdef ZLIB_DLL |
||||
# ifdef ZLIB_INTERNAL |
||||
# define ZEXPORT __declspec(dllexport) |
||||
# define ZEXPORTVA __declspec(dllexport) |
||||
# else |
||||
# define ZEXPORT __declspec(dllimport) |
||||
# define ZEXPORTVA __declspec(dllimport) |
||||
# endif |
||||
# endif |
||||
#endif |
||||
|
||||
#ifdef HAVE_VISIBILITY_PRAGMA |
||||
# define ZEXTERN __attribute__((visibility ("default"))) extern |
||||
#endif |
||||
|
||||
#ifndef ZEXTERN |
||||
# define ZEXTERN extern |
||||
#endif |
||||
#ifndef ZEXPORT |
||||
# define ZEXPORT |
||||
#endif |
||||
#ifndef ZEXPORTVA |
||||
# define ZEXPORTVA |
||||
#endif |
||||
|
||||
#ifndef FAR |
||||
# define FAR |
||||
#endif |
||||
|
||||
#if !defined(__MACTYPES__) |
||||
typedef unsigned char Byte; /* 8 bits */ |
||||
#endif |
||||
typedef unsigned int uInt; /* 16 bits or more */ |
||||
typedef unsigned long uLong; /* 32 bits or more */ |
||||
|
||||
#ifdef SMALL_MEDIUM |
||||
/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ |
||||
# define Bytef Byte FAR |
||||
#else |
||||
typedef Byte FAR Bytef; |
||||
#endif |
||||
typedef char FAR charf; |
||||
typedef int FAR intf; |
||||
typedef uInt FAR uIntf; |
||||
typedef uLong FAR uLongf; |
||||
|
||||
#ifdef STDC |
||||
typedef void const *voidpc; |
||||
typedef void FAR *voidpf; |
||||
typedef void *voidp; |
||||
#else |
||||
typedef Byte const *voidpc; |
||||
typedef Byte FAR *voidpf; |
||||
typedef Byte *voidp; |
||||
#endif |
||||
|
||||
#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ |
||||
# define Z_HAVE_UNISTD_H |
||||
#endif |
||||
|
||||
#ifdef Z_HAVE_UNISTD_H |
||||
# include <sys/types.h> /* for off_t */ |
||||
# include <unistd.h> /* for SEEK_* and off_t */ |
||||
# ifdef VMS |
||||
# include <unixio.h> /* for off_t */ |
||||
# endif |
||||
# ifndef z_off_t |
||||
# define z_off_t off_t |
||||
# endif |
||||
#endif |
||||
|
||||
#ifdef _LARGEFILE64_SOURCE |
||||
# include <sys/types.h> |
||||
#endif |
||||
|
||||
#ifndef SEEK_SET |
||||
# define SEEK_SET 0 /* Seek from beginning of file. */ |
||||
# define SEEK_CUR 1 /* Seek from current position. */ |
||||
# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ |
||||
#endif |
||||
#ifndef z_off_t |
||||
# define z_off_t long |
||||
#endif |
||||
|
||||
#if defined(__OS400__) |
||||
# define NO_vsnprintf |
||||
#endif |
||||
|
||||
#if defined(__MVS__) |
||||
# define NO_vsnprintf |
||||
#endif |
||||
|
||||
/* MVS linker does not support external names larger than 8 bytes */ |
||||
#if defined(__MVS__) |
||||
#pragma map(deflateInit_,"DEIN") |
||||
#pragma map(deflateInit2_,"DEIN2") |
||||
#pragma map(deflateEnd,"DEEND") |
||||
#pragma map(deflateBound,"DEBND") |
||||
#pragma map(inflateInit_,"ININ") |
||||
#pragma map(inflateInit2_,"ININ2") |
||||
#pragma map(inflateEnd,"INEND") |
||||
#pragma map(inflateSync,"INSY") |
||||
#pragma map(inflateSetDictionary,"INSEDI") |
||||
#pragma map(compressBound,"CMBND") |
||||
#pragma map(inflate_table,"INTABL") |
||||
#pragma map(inflate_fast,"INFA") |
||||
#pragma map(inflate_copyright,"INCOPY") |
||||
#endif |
||||
|
||||
#endif /* ZCONF_H */ |
Binary file not shown.
Loading…
Reference in new issue