x86: lavr: add SSE2/AVX dither_int_to_float()

pull/8/head
Justin Ruggles 12 years ago
parent 1fb8f6a44f
commit a6a3164b13
  1. 64
      libavresample/x86/dither.asm
  2. 22
      libavresample/x86/dither_init.c

@ -23,6 +23,9 @@
SECTION_RODATA 32
; 1.0f / (2.0f * INT32_MAX)
pf_dither_scale: times 8 dd 2.32830643762e-10
pf_s16_scale: times 4 dd 32753.0
SECTION_TEXT
@ -51,3 +54,64 @@ cglobal quantize, 4,4,3, dst, src, dither, len
add lenq, mmsize
jl .loop
REP_RET
;------------------------------------------------------------------------------
; void ff_dither_int_to_float_rectangular(float *dst, int *src, int len)
;------------------------------------------------------------------------------
%macro DITHER_INT_TO_FLOAT_RECTANGULAR 0
cglobal dither_int_to_float_rectangular, 3,3,3, dst, src, len
lea lenq, [4*lend]
add srcq, lenq
add dstq, lenq
neg lenq
mova m0, [pf_dither_scale]
.loop:
cvtdq2ps m1, [srcq+lenq]
cvtdq2ps m2, [srcq+lenq+mmsize]
mulps m1, m1, m0
mulps m2, m2, m0
mova [dstq+lenq], m1
mova [dstq+lenq+mmsize], m2
add lenq, 2*mmsize
jl .loop
REP_RET
%endmacro
INIT_XMM sse2
DITHER_INT_TO_FLOAT_RECTANGULAR
INIT_YMM avx
DITHER_INT_TO_FLOAT_RECTANGULAR
;------------------------------------------------------------------------------
; void ff_dither_int_to_float_triangular(float *dst, int *src0, int len)
;------------------------------------------------------------------------------
%macro DITHER_INT_TO_FLOAT_TRIANGULAR 0
cglobal dither_int_to_float_triangular, 3,4,5, dst, src0, len, src1
lea lenq, [4*lend]
lea src1q, [src0q+2*lenq]
add src0q, lenq
add dstq, lenq
neg lenq
mova m0, [pf_dither_scale]
.loop:
cvtdq2ps m1, [src0q+lenq]
cvtdq2ps m2, [src0q+lenq+mmsize]
cvtdq2ps m3, [src1q+lenq]
cvtdq2ps m4, [src1q+lenq+mmsize]
addps m1, m1, m3
addps m2, m2, m4
mulps m1, m1, m0
mulps m2, m2, m0
mova [dstq+lenq], m1
mova [dstq+lenq+mmsize], m2
add lenq, 2*mmsize
jl .loop
REP_RET
%endmacro
INIT_XMM sse2
DITHER_INT_TO_FLOAT_TRIANGULAR
INIT_YMM avx
DITHER_INT_TO_FLOAT_TRIANGULAR

@ -26,6 +26,12 @@
extern void ff_quantize_sse2(int16_t *dst, const float *src, float *dither,
int len);
extern void ff_dither_int_to_float_rectangular_sse2(float *dst, int *src, int len);
extern void ff_dither_int_to_float_rectangular_avx(float *dst, int *src, int len);
extern void ff_dither_int_to_float_triangular_sse2(float *dst, int *src0, int len);
extern void ff_dither_int_to_float_triangular_avx(float *dst, int *src0, int len);
av_cold void ff_dither_init_x86(DitherDSPContext *ddsp,
enum AVResampleDitherMethod method)
{
@ -36,4 +42,20 @@ av_cold void ff_dither_init_x86(DitherDSPContext *ddsp,
ddsp->ptr_align = 16;
ddsp->samples_align = 8;
}
if (method == AV_RESAMPLE_DITHER_RECTANGULAR) {
if (EXTERNAL_SSE2(mm_flags)) {
ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_sse2;
}
if (EXTERNAL_AVX(mm_flags)) {
ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_avx;
}
} else {
if (EXTERNAL_SSE2(mm_flags)) {
ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_sse2;
}
if (EXTERNAL_AVX(mm_flags)) {
ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_avx;
}
}
}

Loading…
Cancel
Save