swr: initialize only the necessary resample dsp functions

Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
pull/76/merge
James Almer 11 years ago committed by Michael Niedermayer
parent 5c65aed7fd
commit 857cd1f33b
  1. 10
      libswresample/resample.c
  2. 12
      libswresample/resample.h
  3. 33
      libswresample/resample_dsp.c
  4. 12
      libswresample/resample_template.c
  5. 78
      libswresample/x86/resample_x86_dsp.c

@ -271,8 +271,6 @@ static int swri_resample(ResampleContext *c,
uint8_t *dst, const uint8_t *src, int *consumed, uint8_t *dst, const uint8_t *src, int *consumed,
int src_size, int dst_size, int update_ctx) int src_size, int dst_size, int update_ctx)
{ {
int fn_idx = c->format - AV_SAMPLE_FMT_S16P;
if (c->filter_length == 1 && c->phase_shift == 0) { if (c->filter_length == 1 && c->phase_shift == 0) {
int index= c->index; int index= c->index;
int frac= c->frac; int frac= c->frac;
@ -281,7 +279,7 @@ static int swri_resample(ResampleContext *c,
int new_size = (src_size * (int64_t)c->src_incr - frac + c->dst_incr - 1) / c->dst_incr; int new_size = (src_size * (int64_t)c->src_incr - frac + c->dst_incr - 1) / c->dst_incr;
dst_size= FFMIN(dst_size, new_size); dst_size= FFMIN(dst_size, new_size);
c->dsp.resample_one[fn_idx](dst, src, dst_size, index2, incr); c->dsp.resample_one(dst, src, dst_size, index2, incr);
index += dst_size * c->dst_incr_div; index += dst_size * c->dst_incr_div;
index += (frac + dst_size * (int64_t)c->dst_incr_mod) / c->src_incr; index += (frac + dst_size * (int64_t)c->dst_incr_mod) / c->src_incr;
@ -298,11 +296,7 @@ static int swri_resample(ResampleContext *c,
dst_size = FFMIN(dst_size, delta_n); dst_size = FFMIN(dst_size, delta_n);
if (dst_size > 0) { if (dst_size > 0) {
if (!c->linear) { *consumed = c->dsp.resample(c, dst, src, dst_size, update_ctx);
*consumed = c->dsp.resample_common[fn_idx](c, dst, src, dst_size, update_ctx);
} else {
*consumed = c->dsp.resample_linear[fn_idx](c, dst, src, dst_size, update_ctx);
}
} else { } else {
*consumed = 0; *consumed = 0;
} }

@ -27,11 +27,6 @@
#include "swresample_internal.h" #include "swresample_internal.h"
typedef void (*resample_one_fn)(uint8_t *dst, const uint8_t *src,
int n, int64_t index, int64_t incr);
typedef int (*resample_fn)(struct ResampleContext *c, uint8_t *dst,
const uint8_t *src, int n, int update_ctx);
typedef struct ResampleContext { typedef struct ResampleContext {
const AVClass *av_class; const AVClass *av_class;
uint8_t *filter_bank; uint8_t *filter_bank;
@ -56,9 +51,10 @@ typedef struct ResampleContext {
int filter_shift; int filter_shift;
struct { struct {
resample_one_fn resample_one[AV_SAMPLE_FMT_NB - AV_SAMPLE_FMT_S16P]; void (*resample_one)(void *dst, const void *src,
resample_fn resample_common[AV_SAMPLE_FMT_NB - AV_SAMPLE_FMT_S16P]; int n, int64_t index, int64_t incr);
resample_fn resample_linear[AV_SAMPLE_FMT_NB - AV_SAMPLE_FMT_S16P]; int (*resample)(struct ResampleContext *c, void *dst,
const void *src, int n, int update_ctx);
} dsp; } dsp;
} ResampleContext; } ResampleContext;

@ -45,21 +45,24 @@
void swri_resample_dsp_init(ResampleContext *c) void swri_resample_dsp_init(ResampleContext *c)
{ {
#define FNIDX(fmt) (AV_SAMPLE_FMT_##fmt - AV_SAMPLE_FMT_S16P) switch(c->format){
c->dsp.resample_one[FNIDX(S16P)] = (resample_one_fn) resample_one_int16; case AV_SAMPLE_FMT_S16P:
c->dsp.resample_one[FNIDX(S32P)] = (resample_one_fn) resample_one_int32; c->dsp.resample_one = resample_one_int16;
c->dsp.resample_one[FNIDX(FLTP)] = (resample_one_fn) resample_one_float; c->dsp.resample = c->linear ? resample_linear_int16 : resample_common_int16;
c->dsp.resample_one[FNIDX(DBLP)] = (resample_one_fn) resample_one_double; break;
case AV_SAMPLE_FMT_S32P:
c->dsp.resample_common[FNIDX(S16P)] = (resample_fn) resample_common_int16; c->dsp.resample_one = resample_one_int32;
c->dsp.resample_common[FNIDX(S32P)] = (resample_fn) resample_common_int32; c->dsp.resample = c->linear ? resample_linear_int32 : resample_common_int32;
c->dsp.resample_common[FNIDX(FLTP)] = (resample_fn) resample_common_float; break;
c->dsp.resample_common[FNIDX(DBLP)] = (resample_fn) resample_common_double; case AV_SAMPLE_FMT_FLTP:
c->dsp.resample_one = resample_one_float;
c->dsp.resample_linear[FNIDX(S16P)] = (resample_fn) resample_linear_int16; c->dsp.resample = c->linear ? resample_linear_float : resample_common_float;
c->dsp.resample_linear[FNIDX(S32P)] = (resample_fn) resample_linear_int32; break;
c->dsp.resample_linear[FNIDX(FLTP)] = (resample_fn) resample_linear_float; case AV_SAMPLE_FMT_DBLP:
c->dsp.resample_linear[FNIDX(DBLP)] = (resample_fn) resample_linear_double; c->dsp.resample_one = resample_one_double;
c->dsp.resample = c->linear ? resample_linear_double : resample_common_double;
break;
}
if (ARCH_X86) swri_resample_dsp_x86_init(c); if (ARCH_X86) swri_resample_dsp_x86_init(c);
} }

@ -70,9 +70,11 @@
#endif #endif
static void RENAME(resample_one)(DELEM *dst, const DELEM *src, static void RENAME(resample_one)(void *dest, const void *source,
int dst_size, int64_t index2, int64_t incr) int dst_size, int64_t index2, int64_t incr)
{ {
DELEM *dst = dest;
const DELEM *src = source;
int dst_index; int dst_index;
for (dst_index = 0; dst_index < dst_size; dst_index++) { for (dst_index = 0; dst_index < dst_size; dst_index++) {
@ -82,9 +84,11 @@ static void RENAME(resample_one)(DELEM *dst, const DELEM *src,
} }
static int RENAME(resample_common)(ResampleContext *c, static int RENAME(resample_common)(ResampleContext *c,
DELEM *dst, const DELEM *src, void *dest, const void *source,
int n, int update_ctx) int n, int update_ctx)
{ {
DELEM *dst = dest;
const DELEM *src = source;
int dst_index; int dst_index;
int index= c->index; int index= c->index;
int frac= c->frac; int frac= c->frac;
@ -120,9 +124,11 @@ static int RENAME(resample_common)(ResampleContext *c,
} }
static int RENAME(resample_linear)(ResampleContext *c, static int RENAME(resample_linear)(ResampleContext *c,
DELEM *dst, const DELEM *src, void *dest, const void *source,
int n, int update_ctx) int n, int update_ctx)
{ {
DELEM *dst = dest;
const DELEM *src = source;
int dst_index; int dst_index;
int index= c->index; int index= c->index;
int frac= c->frac; int frac= c->frac;

@ -28,10 +28,10 @@
#include "libswresample/resample.h" #include "libswresample/resample.h"
#define RESAMPLE_FUNCS(type, opt) \ #define RESAMPLE_FUNCS(type, opt) \
int ff_resample_common_##type##_##opt(ResampleContext *c, uint8_t *dst, \ int ff_resample_common_##type##_##opt(ResampleContext *c, void *dst, \
const uint8_t *src, int sz, int upd); \ const void *src, int sz, int upd); \
int ff_resample_linear_##type##_##opt(ResampleContext *c, uint8_t *dst, \ int ff_resample_linear_##type##_##opt(ResampleContext *c, void *dst, \
const uint8_t *src, int sz, int upd) const void *src, int sz, int upd)
RESAMPLE_FUNCS(int16, mmxext); RESAMPLE_FUNCS(int16, mmxext);
RESAMPLE_FUNCS(int16, sse2); RESAMPLE_FUNCS(int16, sse2);
@ -46,36 +46,44 @@ void swri_resample_dsp_x86_init(ResampleContext *c)
{ {
int av_unused mm_flags = av_get_cpu_flags(); int av_unused mm_flags = av_get_cpu_flags();
#define FNIDX(fmt) (AV_SAMPLE_FMT_##fmt - AV_SAMPLE_FMT_S16P) switch(c->format){
if (ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL && mm_flags & AV_CPU_FLAG_MMX2) { case AV_SAMPLE_FMT_S16P:
c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_mmxext; if (ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL && mm_flags & AV_CPU_FLAG_MMX2) {
c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_mmxext; c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext
} : ff_resample_common_int16_mmxext;
if (HAVE_SSE_EXTERNAL && mm_flags & AV_CPU_FLAG_SSE) { }
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_sse; if (HAVE_SSE2_EXTERNAL && mm_flags & AV_CPU_FLAG_SSE2) {
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_sse; c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2
} : ff_resample_common_int16_sse2;
if (HAVE_SSE2_EXTERNAL && mm_flags & AV_CPU_FLAG_SSE2) { }
c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_sse2; if (HAVE_XOP_EXTERNAL && mm_flags & AV_CPU_FLAG_XOP) {
c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_sse2; c->dsp.resample = c->linear ? ff_resample_linear_int16_xop
: ff_resample_common_int16_xop;
c->dsp.resample_common[FNIDX(DBLP)] = ff_resample_common_double_sse2; }
c->dsp.resample_linear[FNIDX(DBLP)] = ff_resample_linear_double_sse2; break;
} case AV_SAMPLE_FMT_FLTP:
if (HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) { if (HAVE_SSE_EXTERNAL && mm_flags & AV_CPU_FLAG_SSE) {
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_avx; c->dsp.resample = c->linear ? ff_resample_linear_float_sse
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_avx; : ff_resample_common_float_sse;
} }
if (HAVE_FMA3_EXTERNAL && mm_flags & AV_CPU_FLAG_FMA3) { if (HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma3; c->dsp.resample = c->linear ? ff_resample_linear_float_avx
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma3; : ff_resample_common_float_avx;
} }
if (HAVE_FMA4_EXTERNAL && mm_flags & AV_CPU_FLAG_FMA4) { if (HAVE_FMA3_EXTERNAL && mm_flags & AV_CPU_FLAG_FMA3) {
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma4; c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma4; : ff_resample_common_float_fma3;
} }
if (HAVE_XOP_EXTERNAL && mm_flags & AV_CPU_FLAG_XOP) { if (HAVE_FMA4_EXTERNAL && mm_flags & AV_CPU_FLAG_FMA4) {
c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_xop; c->dsp.resample = c->linear ? ff_resample_linear_float_fma4
c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_xop; : ff_resample_common_float_fma4;
}
break;
case AV_SAMPLE_FMT_DBLP:
if (HAVE_SSE2_EXTERNAL && mm_flags & AV_CPU_FLAG_SSE2) {
c->dsp.resample = c->linear ? ff_resample_linear_double_sse2
: ff_resample_common_double_sse2;
}
break;
} }
} }

Loading…
Cancel
Save