dcadsp: add int8x8_fmul_int32 to dsp context

It is currently declared as a macro who is set to inlinable functions,
among which a Neon and a default C implementations.

Add a DSP parameter to each inline function, unused except by the
default C implementation which calls a function from the DSP context.

On an Arrandale CPU, gain for an inlined SSE2 function vs. a call:
- Win32: 29 to 26 cycles
- Win64: 25 to 23 cycles

Signed-off-by: Janne Grunau <janne-libav@jannau.net>
pull/43/merge
Christophe Gisquet 13 years ago committed by Janne Grunau
parent e3fec3f095
commit 2bd44cb705
  1. 3
      libavcodec/arm/dca.h
  2. 10
      libavcodec/dcadec.c
  3. 9
      libavcodec/dcadsp.c
  4. 1
      libavcodec/dcadsp.h

@ -83,7 +83,8 @@ static inline int decode_blockcodes(int code1, int code2, int levels,
#if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y #if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
#define int8x8_fmul_int32 int8x8_fmul_int32 #define int8x8_fmul_int32 int8x8_fmul_int32
static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale) static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
float *dst, const int8_t *src, int scale)
{ {
__asm__ ("vcvt.f32.s32 %2, %2, #4 \n" __asm__ ("vcvt.f32.s32 %2, %2, #4 \n"
"vld1.8 {d0}, [%1,:64] \n" "vld1.8 {d0}, [%1,:64] \n"

@ -1086,12 +1086,10 @@ static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 };
static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 }; static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 };
#ifndef int8x8_fmul_int32 #ifndef int8x8_fmul_int32
static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale) static inline void int8x8_fmul_int32(DCADSPContext *dsp, float *dst,
const int8_t *src, int scale)
{ {
float fscale = scale / 16.0; dsp->int8x8_fmul_int32(dst, src, scale);
int i;
for (i = 0; i < 8; i++)
dst[i] = src[i] * fscale;
} }
#endif #endif
@ -1219,7 +1217,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
s->debug_flag |= 0x01; s->debug_flag |= 0x01;
} }
int8x8_fmul_int32(subband_samples[k][l], int8x8_fmul_int32(&s->dcadsp, subband_samples[k][l],
&high_freq_vq[hfvq][subsubframe * 8], &high_freq_vq[hfvq][subsubframe * 8],
s->scale_factor[k][l][0]); s->scale_factor[k][l][0]);
} }

@ -24,6 +24,14 @@
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "dcadsp.h" #include "dcadsp.h"
static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale)
{
float fscale = scale / 16.0;
int i;
for (i = 0; i < 8; i++)
dst[i] = src[i] * fscale;
}
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
int decifactor, float scale) int decifactor, float scale)
{ {
@ -78,5 +86,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
{ {
s->lfe_fir = dca_lfe_fir_c; s->lfe_fir = dca_lfe_fir_c;
s->qmf_32_subbands = dca_qmf_32_subbands; s->qmf_32_subbands = dca_qmf_32_subbands;
s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
if (ARCH_ARM) ff_dcadsp_init_arm(s); if (ARCH_ARM) ff_dcadsp_init_arm(s);
} }

@ -31,6 +31,7 @@ typedef struct DCADSPContext {
int *synth_buf_offset, float synth_buf2[32], int *synth_buf_offset, float synth_buf2[32],
const float window[512], float *samples_out, const float window[512], float *samples_out,
float raXin[32], float scale); float raXin[32], float scale);
void (*int8x8_fmul_int32)(float *dst, const int8_t *src, int scale);
} DCADSPContext; } DCADSPContext;
void ff_dcadsp_init(DCADSPContext *s); void ff_dcadsp_init(DCADSPContext *s);

Loading…
Cancel
Save