@ -35,7 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
; used in ff_ac3_extract_exponents()
pd_1: times 4 dd 1
pd_151: times 4 dd 151
pb_shuf_4dwb: db 0 , 4 , 8 , 12
SECTION .text
@ -404,15 +403,12 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
% endif
% macro AC3_EXTRACT_EXPONENTS 1
cglobal ac3_extract_exponents_ % 1 , 3 , 3 , 5 , exp , coef , len
cglobal ac3_extract_exponents_ % 1 , 3 , 3 , 4 , exp , coef , len
add expq , lenq
lea coefq , [ coefq + 4 * lenq ]
neg lenq
mova m2 , [ pd_1 ]
mova m3 , [ pd_151 ]
% ifidn %1, ssse3 ;
movd m4 , [ pb_shuf_4dwb ]
% endif
.loop:
; move 4 32-bit coefs to xmm0
mova m0 , [ coefq + 4 * lenq ]
@ -426,12 +422,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
mova m0 , m3
psubd m0 , m1
; move the lowest byte in each of 4 dwords to the low dword
% ifidn %1, ssse3
pshufb m0 , m4
% else
; NOTE: We cannot just extract the low bytes with pshufb because the dword
; result for 16777215 is -1 due to float inaccuracy. Using packuswb
; clips this to 0, which is the correct exponent.
packssdw m0 , m0
packuswb m0 , m0
% endif
movd [ expq + lenq ], m0
add lenq , 4