; 75 "inffast.S" |
;FILE "inffast.S" |
;;;GLOBAL _inflate_fast |
;;;SECTION .text |
.586p |
.mmx |
name inflate_fast_x86 |
_DATA segment |
inflate_fast_use_mmx: |
dd 1 |
_TEXT segment |
PUBLIC _inflate_fast |
_inflate_fast: |
jmp inflate_fast_entry |
db 'Fast decoding Code from Chris Anderson' |
db 0 |
invalid_literal_length_code_msg: |
db 'invalid literal/length code' |
db 0 |
invalid_distance_code_msg: |
db 'invalid distance code' |
db 0 |
invalid_distance_too_far_msg: |
db 'invalid distance too far back' |
db 0 |
inflate_fast_mask: |
dd 0 |
dd 1 |
dd 3 |
dd 7 |
dd 15 |
dd 31 |
dd 63 |
dd 127 |
dd 255 |
dd 511 |
dd 1023 |
dd 2047 |
dd 4095 |
dd 8191 |
dd 16383 |
dd 32767 |
dd 65535 |
dd 131071 |
dd 262143 |
dd 524287 |
dd 1048575 |
dd 2097151 |
dd 4194303 |
dd 8388607 |
dd 16777215 |
dd 33554431 |
dd 67108863 |
dd 134217727 |
dd 268435455 |
dd 536870911 |
dd 1073741823 |
dd 2147483647 |
dd 4294967295 |
;;SECTION .text |
; 205 "inffast.S" |
;GLOBAL inflate_fast_use_mmx |
;SECTION .data |
; GLOBAL inflate_fast_use_mmx:object |
;.size inflate_fast_use_mmx, 4 |
; 226 "inffast.S" |
;SECTION .text |
inflate_fast_entry: |
push edi |
push esi |
push ebp |
push ebx |
pushfd |
sub esp,64 |
cld |
mov esi, [esp+88] |
mov edi, [esi+28] |
mov edx, [esi+4] |
mov eax, [esi+0] |
add edx,eax |
sub edx,11 |
mov [esp+44],eax |
mov [esp+20],edx |
mov ebp, [esp+92] |
mov ecx, [esi+16] |
mov ebx, [esi+12] |
sub ebp,ecx |
neg ebp |
add ebp,ebx |
sub ecx,257 |
add ecx,ebx |
mov [esp+60],ebx |
mov [esp+40],ebp |
mov [esp+16],ecx |
; 285 "inffast.S" |
mov eax, [edi+64] |
mov ecx, [edi+68] |
mov [esp+8],eax |
mov [esp+12],ecx |
mov eax,1 |
mov ecx, [edi+72] |
shl eax,cl |
dec eax |
mov [esp+0],eax |
mov eax,1 |
mov ecx, [edi+76] |
shl eax,cl |
dec eax |
mov [esp+4],eax |
mov eax, [edi+32] |
mov ecx, [edi+36] |
mov edx, [edi+40] |
mov [esp+52],eax |
mov [esp+48],ecx |
mov [esp+56],edx |
mov ebp, [edi+44] |
mov ebx, [edi+48] |
; 321 "inffast.S" |
mov esi, [esp+44] |
mov ecx, [esp+20] |
cmp ecx,esi |
ja L_align_long |
add ecx,11 |
sub ecx,esi |
mov eax,12 |
sub eax,ecx |
lea edi, [esp+28] |
rep movsb |
mov ecx,eax |
xor eax,eax |
rep stosb |
lea esi, [esp+28] |
mov [esp+20],esi |
jmp L_is_aligned |
L_align_long: |
test esi,3 |
jz L_is_aligned |
xor eax,eax |
mov al, [esi] |
inc esi |
mov ecx,ebx |
add ebx,8 |
shl eax,cl |
or ebp,eax |
jmp L_align_long |
L_is_aligned: |
mov edi, [esp+60] |
; 366 "inffast.S" |
L_check_mmx: |
cmp dword ptr [inflate_fast_use_mmx],2 |
je L_init_mmx |
ja L_do_loop |
push eax |
push ebx |
push ecx |
push edx |
pushfd |
mov eax, [esp] |
xor dword ptr [esp],0200000h |
popfd |
pushfd |
pop edx |
xor edx,eax |
jz L_dont_use_mmx |
xor eax,eax |
cpuid |
cmp ebx,0756e6547h |
jne L_dont_use_mmx |
cmp ecx,06c65746eh |
jne L_dont_use_mmx |
cmp edx,049656e69h |
jne L_dont_use_mmx |
mov eax,1 |
cpuid |
shr eax,8 |
and eax,15 |
cmp eax,6 |
jne L_dont_use_mmx |
test edx,0800000h |
jnz L_use_mmx |
jmp L_dont_use_mmx |
L_use_mmx: |
mov dword ptr [inflate_fast_use_mmx],2 |
jmp L_check_mmx_pop |
L_dont_use_mmx: |
mov dword ptr [inflate_fast_use_mmx],3 |
L_check_mmx_pop: |
pop edx |
pop ecx |
pop ebx |
pop eax |
jmp L_check_mmx |
; 426 "inffast.S" |
L_do_loop: |
; 437 "inffast.S" |
cmp bl,15 |
ja L_get_length_code |
xor eax,eax |
lodsw |
mov cl,bl |
add bl,16 |
shl eax,cl |
or ebp,eax |
L_get_length_code: |
mov edx, [esp+0] |
mov ecx, [esp+8] |
and edx,ebp |
mov eax, [ecx+edx*4] |
L_dolen: |
mov cl,ah |
sub bl,ah |
shr ebp,cl |
test al,al |
jnz L_test_for_length_base |
shr eax,16 |
stosb |
L_while_test: |
cmp [esp+16],edi |
jbe L_break_loop |
cmp [esp+20],esi |
ja L_do_loop |
jmp L_break_loop |
L_test_for_length_base: |
; 502 "inffast.S" |
mov edx,eax |
shr edx,16 |
mov cl,al |
test al,16 |
jz L_test_for_second_level_length |
and cl,15 |
jz L_save_len |
cmp bl,cl |
jae L_add_bits_to_len |
mov ch,cl |
xor eax,eax |
lodsw |
mov cl,bl |
add bl,16 |
shl eax,cl |
or ebp,eax |
mov cl,ch |
L_add_bits_to_len: |
mov eax,1 |
shl eax,cl |
dec eax |
sub bl,cl |
and eax,ebp |
shr ebp,cl |
add edx,eax |
L_save_len: |
mov [esp+24],edx |
L_decode_distance: |
; 549 "inffast.S" |
cmp bl,15 |
ja L_get_distance_code |
xor eax,eax |
lodsw |
mov cl,bl |
add bl,16 |
shl eax,cl |
or ebp,eax |
L_get_distance_code: |
mov edx, [esp+4] |
mov ecx, [esp+12] |
and edx,ebp |
mov eax, [ecx+edx*4] |
L_dodist: |
mov edx,eax |
shr edx,16 |
mov cl,ah |
sub bl,ah |
shr ebp,cl |
; 584 "inffast.S" |
mov cl,al |
test al,16 |
jz L_test_for_second_level_dist |
and cl,15 |
jz L_check_dist_one |
cmp bl,cl |
jae L_add_bits_to_dist |
mov ch,cl |
xor eax,eax |
lodsw |
mov cl,bl |
add bl,16 |
shl eax,cl |
or ebp,eax |
mov cl,ch |
L_add_bits_to_dist: |
mov eax,1 |
shl eax,cl |
dec eax |
sub bl,cl |
and eax,ebp |
shr ebp,cl |
add edx,eax |
jmp L_check_window |
L_check_window: |
; 625 "inffast.S" |
mov [esp+44],esi |
mov eax,edi |
sub eax, [esp+40] |
cmp eax,edx |
jb L_clip_window |
mov ecx, [esp+24] |
mov esi,edi |
sub esi,edx |
sub ecx,3 |
mov al, [esi] |
mov [edi],al |
mov al, [esi+1] |
mov dl, [esi+2] |
add esi,3 |
mov [edi+1],al |
mov [edi+2],dl |
add edi,3 |
rep movsb |
mov esi, [esp+44] |
jmp L_while_test |
L_check_dist_one: |
cmp edx,1 |
jne L_check_window |
cmp [esp+40],edi |
je L_check_window |
dec edi |
mov ecx, [esp+24] |
mov al, [edi] |
sub ecx,3 |
mov [edi+1],al |
mov [edi+2],al |
mov [edi+3],al |
add edi,4 |
rep stosb |
jmp L_while_test |
L_test_for_second_level_length: |
test al,64 |
jnz L_test_for_end_of_block |
mov eax,1 |
shl eax,cl |
dec eax |
and eax,ebp |
add eax,edx |
mov edx, [esp+8] |
mov eax, [edx+eax*4] |
jmp L_dolen |
L_test_for_second_level_dist: |
test al,64 |
jnz L_invalid_distance_code |
mov eax,1 |
shl eax,cl |
dec eax |
and eax,ebp |
add eax,edx |
mov edx, [esp+12] |
mov eax, [edx+eax*4] |
jmp L_dodist |
L_clip_window: |
; 721 "inffast.S" |
mov ecx,eax |
mov eax, [esp+52] |
neg ecx |
mov esi, [esp+56] |
cmp eax,edx |
jb L_invalid_distance_too_far |
add ecx,edx |
cmp dword ptr [esp+48],0 |
jne L_wrap_around_window |
sub eax,ecx |
add esi,eax |
; 749 "inffast.S" |
mov eax, [esp+24] |
cmp eax,ecx |
jbe L_do_copy1 |
sub eax,ecx |
rep movsb |
mov esi,edi |
sub esi,edx |
jmp L_do_copy1 |
cmp eax,ecx |
jbe L_do_copy1 |
sub eax,ecx |
rep movsb |
mov esi,edi |
sub esi,edx |
jmp L_do_copy1 |
L_wrap_around_window: |
; 793 "inffast.S" |
mov eax, [esp+48] |
cmp ecx,eax |
jbe L_contiguous_in_window |
add esi, [esp+52] |
add esi,eax |
sub esi,ecx |
sub ecx,eax |
mov eax, [esp+24] |
cmp eax,ecx |
jbe L_do_copy1 |
sub eax,ecx |
rep movsb |
mov esi, [esp+56] |
mov ecx, [esp+48] |
cmp eax,ecx |
jbe L_do_copy1 |
sub eax,ecx |
rep movsb |
mov esi,edi |
sub esi,edx |
jmp L_do_copy1 |
L_contiguous_in_window: |
; 836 "inffast.S" |
add esi,eax |
sub esi,ecx |
mov eax, [esp+24] |
cmp eax,ecx |
jbe L_do_copy1 |
sub eax,ecx |
rep movsb |
mov esi,edi |
sub esi,edx |
L_do_copy1: |
; 862 "inffast.S" |
mov ecx,eax |
rep movsb |
mov esi, [esp+44] |
jmp L_while_test |
; 878 "inffast.S" |
L_init_mmx: |
emms |
movd mm0,ebp |
mov ebp,ebx |
; 896 "inffast.S" |
movd mm4,[esp+0] |
movq mm3,mm4 |
movd mm5,[esp+4] |
movq mm2,mm5 |
pxor mm1,mm1 |
mov ebx, [esp+8] |
jmp L_do_loop_mmx |
L_do_loop_mmx: |
psrlq mm0,mm1 |
cmp ebp,32 |
ja L_get_length_code_mmx |
movd mm6,ebp |
movd mm7,[esi] |
add esi,4 |
psllq mm7,mm6 |
add ebp,32 |
por mm0,mm7 |
L_get_length_code_mmx: |
pand mm4,mm0 |
movd eax,mm4 |
movq mm4,mm3 |
mov eax, [ebx+eax*4] |
L_dolen_mmx: |
movzx ecx,ah |
movd mm1,ecx |
sub ebp,ecx |
test al,al |
jnz L_test_for_length_base_mmx |
shr eax,16 |
stosb |
L_while_test_mmx: |
cmp [esp+16],edi |
jbe L_break_loop |
cmp [esp+20],esi |
ja L_do_loop_mmx |
jmp L_break_loop |
L_test_for_length_base_mmx: |
mov edx,eax |
shr edx,16 |
test al,16 |
jz L_test_for_second_level_length_mmx |
and eax,15 |
jz L_decode_distance_mmx |
psrlq mm0,mm1 |
movd mm1,eax |
movd ecx,mm0 |
sub ebp,eax |
and ecx, [inflate_fast_mask+eax*4] |
add edx,ecx |
L_decode_distance_mmx: |
psrlq mm0,mm1 |
cmp ebp,32 |
ja L_get_dist_code_mmx |
movd mm6,ebp |
movd mm7,[esi] |
add esi,4 |
psllq mm7,mm6 |
add ebp,32 |
por mm0,mm7 |
L_get_dist_code_mmx: |
mov ebx, [esp+12] |
pand mm5,mm0 |
movd eax,mm5 |
movq mm5,mm2 |
mov eax, [ebx+eax*4] |
L_dodist_mmx: |
movzx ecx,ah |
mov ebx,eax |
shr ebx,16 |
sub ebp,ecx |
movd mm1,ecx |
test al,16 |
jz L_test_for_second_level_dist_mmx |
and eax,15 |
jz L_check_dist_one_mmx |
L_add_bits_to_dist_mmx: |
psrlq mm0,mm1 |
movd mm1,eax |
movd ecx,mm0 |
sub ebp,eax |
and ecx, [inflate_fast_mask+eax*4] |
add ebx,ecx |
L_check_window_mmx: |
mov [esp+44],esi |
mov eax,edi |
sub eax, [esp+40] |
cmp eax,ebx |
jb L_clip_window_mmx |
mov ecx,edx |
mov esi,edi |
sub esi,ebx |
sub ecx,3 |
mov al, [esi] |
mov [edi],al |
mov al, [esi+1] |
mov dl, [esi+2] |
add esi,3 |
mov [edi+1],al |
mov [edi+2],dl |
add edi,3 |
rep movsb |
mov esi, [esp+44] |
mov ebx, [esp+8] |
jmp L_while_test_mmx |
L_check_dist_one_mmx: |
cmp ebx,1 |
jne L_check_window_mmx |
cmp [esp+40],edi |
je L_check_window_mmx |
dec edi |
mov ecx,edx |
mov al, [edi] |
sub ecx,3 |
mov [edi+1],al |
mov [edi+2],al |
mov [edi+3],al |
add edi,4 |
rep stosb |
mov ebx, [esp+8] |
jmp L_while_test_mmx |
L_test_for_second_level_length_mmx: |
test al,64 |
jnz L_test_for_end_of_block |
and eax,15 |
psrlq mm0,mm1 |
movd ecx,mm0 |
and ecx, [inflate_fast_mask+eax*4] |
add ecx,edx |
mov eax, [ebx+ecx*4] |
jmp L_dolen_mmx |
L_test_for_second_level_dist_mmx: |
test al,64 |
jnz L_invalid_distance_code |
and eax,15 |
psrlq mm0,mm1 |
movd ecx,mm0 |
and ecx, [inflate_fast_mask+eax*4] |
mov eax, [esp+12] |
add ecx,ebx |
mov eax, [eax+ecx*4] |
jmp L_dodist_mmx |
L_clip_window_mmx: |
mov ecx,eax |
mov eax, [esp+52] |
neg ecx |
mov esi, [esp+56] |
cmp eax,ebx |
jb L_invalid_distance_too_far |
add ecx,ebx |
cmp dword ptr [esp+48],0 |
jne L_wrap_around_window_mmx |
sub eax,ecx |
add esi,eax |
cmp edx,ecx |
jbe L_do_copy1_mmx |
sub edx,ecx |
rep movsb |
mov esi,edi |
sub esi,ebx |
jmp L_do_copy1_mmx |
cmp edx,ecx |
jbe L_do_copy1_mmx |
sub edx,ecx |
rep movsb |
mov esi,edi |
sub esi,ebx |
jmp L_do_copy1_mmx |
L_wrap_around_window_mmx: |
mov eax, [esp+48] |
cmp ecx,eax |
jbe L_contiguous_in_window_mmx |
add esi, [esp+52] |
add esi,eax |
sub esi,ecx |
sub ecx,eax |
cmp edx,ecx |
jbe L_do_copy1_mmx |
sub edx,ecx |
rep movsb |
mov esi, [esp+56] |
mov ecx, [esp+48] |
cmp edx,ecx |
jbe L_do_copy1_mmx |
sub edx,ecx |
rep movsb |
mov esi,edi |
sub esi,ebx |
jmp L_do_copy1_mmx |
L_contiguous_in_window_mmx: |
add esi,eax |
sub esi,ecx |
cmp edx,ecx |
jbe L_do_copy1_mmx |
sub edx,ecx |
rep movsb |
mov esi,edi |
sub esi,ebx |
L_do_copy1_mmx: |
mov ecx,edx |
rep movsb |
mov esi, [esp+44] |
mov ebx, [esp+8] |
jmp L_while_test_mmx |
; 1174 "inffast.S" |
L_invalid_distance_code: |
mov ecx, invalid_distance_code_msg |
mov edx,26 |
jmp L_update_stream_state |
L_test_for_end_of_block: |
test al,32 |
jz L_invalid_literal_length_code |
mov ecx,0 |
mov edx,11 |
jmp L_update_stream_state |
L_invalid_literal_length_code: |
mov ecx, invalid_literal_length_code_msg |
mov edx,26 |
jmp L_update_stream_state |
L_invalid_distance_too_far: |
mov esi, [esp+44] |
mov ecx, invalid_distance_too_far_msg |
mov edx,26 |
jmp L_update_stream_state |
L_update_stream_state: |
mov eax, [esp+88] |
test ecx,ecx |
jz L_skip_msg |
mov [eax+24],ecx |
L_skip_msg: |
mov eax, [eax+28] |
mov [eax+0],edx |
jmp L_break_loop |
L_break_loop: |
; 1243 "inffast.S" |
cmp dword ptr [inflate_fast_use_mmx],2 |
jne L_update_next_in |
mov ebx,ebp |
L_update_next_in: |
; 1266 "inffast.S" |
mov eax, [esp+88] |
mov ecx,ebx |
mov edx, [eax+28] |
shr ecx,3 |
sub esi,ecx |
shl ecx,3 |
sub ebx,ecx |
mov [eax+12],edi |
mov [edx+48],ebx |
mov ecx,ebx |
lea ebx, [esp+28] |
cmp [esp+20],ebx |
jne L_buf_not_used |
sub esi,ebx |
mov ebx, [eax+0] |
mov [esp+20],ebx |
add esi,ebx |
mov ebx, [eax+4] |
sub ebx,11 |
add [esp+20],ebx |
L_buf_not_used: |
mov [eax+0],esi |
mov ebx,1 |
shl ebx,cl |
dec ebx |
cmp dword ptr [inflate_fast_use_mmx],2 |
jne L_update_hold |
psrlq mm0,mm1 |
movd ebp,mm0 |
emms |
L_update_hold: |
and ebp,ebx |
mov [edx+44],ebp |
mov ebx, [esp+20] |
cmp ebx,esi |
jbe L_last_is_smaller |
sub ebx,esi |
add ebx,11 |
mov [eax+4],ebx |
jmp L_fixup_out |
L_last_is_smaller: |
sub esi,ebx |
neg esi |
add esi,11 |
mov [eax+4],esi |
L_fixup_out: |
mov ebx, [esp+16] |
cmp ebx,edi |
jbe L_end_is_smaller |
sub ebx,edi |
add ebx,257 |
mov [eax+16],ebx |
jmp L_done |
L_end_is_smaller: |
sub edi,ebx |
neg edi |
add edi,257 |
mov [eax+16],edi |
L_done: |
add esp,64 |
popfd |
pop ebx |
pop ebp |
pop esi |
pop edi |
ret |
_TEXT ends |