swr: mix_2_1_int16_mmx/sse

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
pull/59/head
Michael Niedermayer 13 years ago
parent d504266cef
commit 728f86edfc
  1. 2
      libswresample/rematrix.c
  2. 71
      libswresample/x86/rematrix.asm
  3. 2
      libswresample/x86/swresample_x86.c

@ -393,7 +393,7 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus
int in_i1 = s->matrix_ch[out_i][1]; int in_i1 = s->matrix_ch[out_i][1];
int in_i2 = s->matrix_ch[out_i][2]; int in_i2 = s->matrix_ch[out_i][2];
if(s->mix_2_1_simd && len1) if(s->mix_2_1_simd && len1)
s->mix_2_1_simd(out->ch[out_i] , in->ch[in_i1] , in->ch[in_i2] , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1); s->mix_2_1_simd(out->ch[out_i] , in->ch[in_i1] , in->ch[in_i2] , s->native_simd_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1);
else else
s->mix_2_1_f (out->ch[out_i] , in->ch[in_i1] , in->ch[in_i2] , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1); s->mix_2_1_f (out->ch[out_i] , in->ch[in_i1] , in->ch[in_i2] , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1);
if(len != len1) if(len != len1)

@ -158,9 +158,78 @@ mix_1_1_int16_u_int %+ SUFFIX
%endif %endif
%endmacro %endmacro
%macro MIX2_INT16 1
cglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len
%ifidn %1, a
test in1q, mmsize-1
jne mix_2_1_int16_u_int %+ SUFFIX
test in2q, mmsize-1
jne mix_2_1_int16_u_int %+ SUFFIX
test outq, mmsize-1
jne mix_2_1_int16_u_int %+ SUFFIX
%else
mix_2_1_int16_u_int %+ SUFFIX
%endif
movd m4, [coeffpq + 4*index1q]
movd m6, [coeffpq + 4*index2q]
SPLATW m5, m4
SPLATW m6, m6
psllq m4, 32
psrlq m4, 48
mova m7, [dw1]
pslld m7, m4
psrld m7, 1
punpcklwd m5, m6
add lenq , lenq
add in1q , lenq
add in2q , lenq
add outq , lenq
neg lenq
.next:
mov%1 m0, [in1q + lenq ]
mov%1 m2, [in2q + lenq ]
mova m1, m0
punpcklwd m0, m2
punpckhwd m1, m2
mov%1 m2, [in1q + lenq + mmsize]
mov%1 m6, [in2q + lenq + mmsize]
mova m3, m2
punpcklwd m2, m6
punpckhwd m3, m6
pmaddwd m0, m5
pmaddwd m1, m5
pmaddwd m2, m5
pmaddwd m3, m5
paddd m0, m7
paddd m1, m7
paddd m2, m7
paddd m3, m7
psrad m0, m4
psrad m1, m4
psrad m2, m4
psrad m3, m4
packssdw m0, m1
packssdw m2, m3
mov%1 [outq + lenq ], m0
mov%1 [outq + lenq + mmsize], m2
add lenq, mmsize*2
jl .next
%if mmsize == 8
emms
RET
%else
REP_RET
%endif
%endmacro
INIT_MMX mmx INIT_MMX mmx
MIX1_INT16 u MIX1_INT16 u
MIX1_INT16 a MIX1_INT16 a
MIX2_INT16 u
MIX2_INT16 a
INIT_XMM sse INIT_XMM sse
MIX2_FLT u MIX2_FLT u
@ -169,6 +238,8 @@ MIX1_FLT u
MIX1_FLT a MIX1_FLT a
MIX1_INT16 u MIX1_INT16 u
MIX1_INT16 a MIX1_INT16 a
MIX2_INT16 u
MIX2_INT16 a
%if HAVE_AVX %if HAVE_AVX
INIT_YMM avx INIT_YMM avx

@ -166,9 +166,11 @@ void swri_rematrix_init_x86(struct SwrContext *s){
if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
if(mm_flags & AV_CPU_FLAG_MMX) { if(mm_flags & AV_CPU_FLAG_MMX) {
s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx; s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx;
} }
if(mm_flags & AV_CPU_FLAG_SSE) { if(mm_flags & AV_CPU_FLAG_SSE) {
s->mix_1_1_simd = ff_mix_1_1_a_int16_sse; s->mix_1_1_simd = ff_mix_1_1_a_int16_sse;
s->mix_2_1_simd = ff_mix_2_1_a_int16_sse;
} }
s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t)); s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
for(i=0; i<nb_out; i++){ for(i=0; i<nb_out; i++){

Loading…
Cancel
Save