swr: mix_1_1 int16 MMX

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
pull/59/head
Michael Niedermayer 13 years ago
parent 52afa43691
commit cbeeaf2593
  1. 2
      libswresample/rematrix.c
  2. 63
      libswresample/x86/rematrix.asm
  3. 15
      libswresample/x86/swresample_x86.c

@ -380,7 +380,7 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus
in_i= s->matrix_ch[out_i][1]; in_i= s->matrix_ch[out_i][1];
if(s->matrix[out_i][in_i]!=1.0){ if(s->matrix[out_i][in_i]!=1.0){
if(s->mix_1_1_simd && len1) if(s->mix_1_1_simd && len1)
s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_matrix, in->ch_count*out_i + in_i, len1); s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_simd_matrix, in->ch_count*out_i + in_i, len1);
if(len != len1) if(len != len1)
s->mix_1_1_f (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1); s->mix_1_1_f (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1);
}else if(mustcopy){ }else if(mustcopy){

@ -21,6 +21,12 @@
%include "libavutil/x86/x86inc.asm" %include "libavutil/x86/x86inc.asm"
%include "libavutil/x86/x86util.asm" %include "libavutil/x86/x86util.asm"
SECTION_RODATA
align 32
dw1: times 8 dd 1
w1 : times 16 dw 1
SECTION .text SECTION .text
%macro MIX2_FLT 1 %macro MIX2_FLT 1
@ -99,6 +105,63 @@ mix_1_1_float_u_int %+ SUFFIX
REP_RET REP_RET
%endmacro %endmacro
%macro MIX1_INT16 1
cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
%ifidn %1, a
test inq, mmsize-1
jne mix_1_1_int16_u_int %+ SUFFIX
test outq, mmsize-1
jne mix_1_1_int16_u_int %+ SUFFIX
%else
mix_1_1_int16_u_int %+ SUFFIX
%endif
movd m4, [coeffpq + 4*indexq]
SPLATW m5, m4
psllq m4, 32
psrlq m4, 48
mova m0, [w1]
psllw m0, m4
psrlw m0, 1
punpcklwd m5, m0
add lenq , lenq
add inq , lenq
add outq , lenq
neg lenq
.next:
mov%1 m0, [inq + lenq ]
mov%1 m2, [inq + lenq + mmsize]
mova m1, m0
mova m3, m2
punpcklwd m0, [w1]
punpckhwd m1, [w1]
punpcklwd m2, [w1]
punpckhwd m3, [w1]
pmaddwd m0, m5
pmaddwd m1, m5
pmaddwd m2, m5
pmaddwd m3, m5
psrad m0, m4
psrad m1, m4
psrad m2, m4
psrad m3, m4
packssdw m0, m1
packssdw m2, m3
mov%1 [outq + lenq ], m0
mov%1 [outq + lenq + mmsize], m2
add lenq, mmsize*2
jl .next
%if mmsize == 8
emms
RET
%else
REP_RET
%endif
%endmacro
INIT_MMX mmx
MIX1_INT16 u
MIX1_INT16 a
INIT_XMM sse INIT_XMM sse
MIX2_FLT u MIX2_FLT u
MIX2_FLT a MIX2_FLT a

@ -163,6 +163,21 @@ void swri_rematrix_init_x86(struct SwrContext *s){
s->mix_2_1_simd = NULL; s->mix_2_1_simd = NULL;
if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
if(mm_flags & AV_CPU_FLAG_MMX) {
s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
}
s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
for(i=0; i<nb_out; i++){
int sh = 0;
for(j=0; j<nb_in; j++)
sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
sh = FFMAX(av_log2(sh) - 14, 0);
for(j=0; j<nb_in; j++) {
((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
}
}
} else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
if(mm_flags & AV_CPU_FLAG_SSE) { if(mm_flags & AV_CPU_FLAG_SSE) {
s->mix_1_1_simd = ff_mix_1_1_a_float_sse; s->mix_1_1_simd = ff_mix_1_1_a_float_sse;

Loading…
Cancel
Save