@ -41,8 +41,8 @@ SECTION .text
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
;-----------------------------------------------------------------------------
% macro AC3_EXPONENT_MIN 1
cglobal ac3_exponent_min_ % 1 , 3 , 4 , 2 , exp , reuse_blks , expn , offset
% macro AC3_EXPONENT_MIN 0
cglobal ac3_exponent_min , 3 , 4 , 2 , exp , reuse_blks , expn , offset
shl reuse_blksq , 8
jz .end
LOOP_ALIGN
@ -65,16 +65,17 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
% define PMINUB PMINUB_MMX
% define LOOP_ALIGN
INIT_MMX
AC3_EXPONENT_MIN mmx
INIT_MMX mmx
AC3_EXPONENT_MIN
% if HAVE_MMXEXT_EXTERNAL
% define PMINUB PMINUB_MMXEXT
% define LOOP_ALIGN ALIGN 16
AC3_EXPONENT_MIN mmxext
INIT_MMX mmxext
AC3_EXPONENT_MIN
% endif
% if HAVE_SSE2_EXTERNAL
INIT_XMM
AC3_EXPONENT_MIN ss e2
INIT_XMM ss e2
AC3_EXPONENT_MIN
% endif
% undef PMINUB
% undef LOOP_ALIGN
@ -168,8 +169,8 @@ AC3_MAX_MSB_ABS_INT16 or_abs
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
;-----------------------------------------------------------------------------
% macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set
cglobal ac3_ % 1 shift_int % 2 _ % 4 , 3 , 3 , 5 , src , len , shift
% macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set
cglobal ac3_ % 1 shift_int % 2 , 3 , 3 , 5 , src , len , shift
movd m0 , shiftd
.loop:
mova m1 , [ srcq ]
@ -195,19 +196,19 @@ cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
;-----------------------------------------------------------------------------
INIT_MMX
AC3_SHIFT l , 16 , psllw , mmx
INIT_XMM
AC3_SHIFT l , 16 , psllw , ss e2
INIT_MMX mmx
AC3_SHIFT l , 16 , psllw
INIT_XMM ss e2
AC3_SHIFT l , 16 , psllw
;-----------------------------------------------------------------------------
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
;-----------------------------------------------------------------------------
INIT_MMX
AC3_SHIFT r , 32 , psrad , mmx
INIT_XMM
AC3_SHIFT r , 32 , psrad , ss e2
INIT_MMX mmx
AC3_SHIFT r , 32 , psrad
INIT_XMM ss e2
AC3_SHIFT r , 32 , psrad
;-----------------------------------------------------------------------------
; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
@ -215,8 +216,8 @@ AC3_SHIFT r, 32, psrad, sse2
; The 3DNow! version is not bit-identical because pf2id uses truncation rather
; than round-to-nearest.
INIT_MMX
cglobal float_to_fixed24_3dnow , 3 , 3 , 0 , ds t , src , len
INIT_MMX 3 dnow
cglobal float_to_fixed24 , 3 , 3 , 0 , ds t , src , len
movq m0 , [ pf_1_24 ]
.loop:
movq m1 , [ srcq ]
@ -242,8 +243,8 @@ cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len
femms
RET
INIT_XMM
cglobal float_to_fixed24_sse , 3 , 3 , 3 , ds t , src , len
INIT_XMM ss e
cglobal float_to_fixed24 , 3 , 3 , 3 , ds t , src , len
movaps m0 , [ pf_1_24 ]
.loop:
movaps m1 , [ srcq ]
@ -267,8 +268,8 @@ cglobal float_to_fixed24_sse, 3,3,3, dst, src, len
emms
RET
INIT_XMM
cglobal float_to_fixed24_sse2 , 3 , 3 , 9 , ds t , src , len
INIT_XMM ss e2
cglobal float_to_fixed24 , 3 , 3 , 9 , ds t , src , len
movaps m0 , [ pf_1_24 ]
.loop:
movaps m1 , [ srcq ]
@ -332,8 +333,8 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
paddd % 1 , % 2
% endmacro
INIT_XMM
cglobal ac3_compute_mantissa_size_sse2 , 1 , 2 , 4 , mant_cnt , sum
INIT_XMM ss e2
cglobal ac3_compute_mantissa_size , 1 , 2 , 4 , mant_cnt , sum
movdqa m0 , [ mant_cntq ]
movdqa m1 , [ mant_cntq + 1 * 16 ]
paddw m0 , [ mant_cntq + 2 * 16 ]
@ -373,20 +374,20 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
;------------------------------------------------------------------------------
% macro PABSD_MMX 2 ; src/dst, tmp
% macro PABSD 1-2 ; src/dst, unused
% if cpuflag(ssse3)
pabsd % 1 , % 1
% else ; src/dst, tmp
pxor % 2 , % 2
pcmpgtd % 2 , % 1
pxor % 1 , % 2
psubd % 1 , % 2
% endmacro
% macro PABSD_SSSE3 1-2 ; src/dst, unused
pabsd % 1 , % 1
% endif
% endmacro
% if HAVE_AMD3DNOW_EXTERNAL
INIT_MMX
cglobal ac3_extract_exponents_3dnow , 3 , 3 , 0 , exp , coef , len
INIT_MMX 3 dnow
cglobal ac3_extract_exponents , 3 , 3 , 0 , exp , coef , len
add expq , lenq
lea coefq , [ coefq + 4 * lenq ]
neg lenq
@ -395,8 +396,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
.loop:
movq m0 , [ coefq + 4 * lenq ]
movq m1 , [ coefq + 4 * lenq + 8 ]
PABSD_MMX m0 , m2
PABSD_MMX m1 , m2
PABSD m0 , m2
PABSD m1 , m2
pslld m0 , 1
por m0 , m3
pi2fd m2 , m0
@ -420,8 +421,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
REP_RET
% endif
% macro AC3_EXTRACT_EXPONENTS 1
cglobal ac3_extract_exponents_ % 1 , 3 , 3 , 4 , exp , coef , len
% macro AC3_EXTRACT_EXPONENTS 0
cglobal ac3_extract_exponents , 3 , 3 , 4 , exp , coef , len
add expq , lenq
lea coefq , [ coefq + 4 * lenq ]
neg lenq
@ -453,11 +454,10 @@ cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len
% endmacro
% if HAVE_SSE2_EXTERNAL
INIT_XMM
% define PABSD PABSD_MMX
AC3_EXTRACT_EXPONENTS ss e2
% if HAVE_SSSE3_EXTERNAL
% define PABSD PABSD_SSSE3
AC3_EXTRACT_EXPONENTS ss se3
INIT_XMM ss e2
AC3_EXTRACT_EXPONENTS
% endif
% if HAVE_SSSE3_EXTERNAL
INIT_XMM ss se3
AC3_EXTRACT_EXPONENTS
% endif