From 0c142e4cda3dbc3b460a5f8c1095cee6594ed5c6 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Tue, 19 Jun 2012 03:06:40 +0200 Subject: [PATCH] swr: introduce filter_alloc in preparation of SIMD resample optimisations Signed-off-by: Michael Niedermayer --- libswresample/resample.c | 20 +++++++++++--------- libswresample/resample_template.c | 6 +++--- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/libswresample/resample.c b/libswresample/resample.c index 558401c459..81da0ff14a 100644 --- a/libswresample/resample.c +++ b/libswresample/resample.c @@ -37,6 +37,7 @@ typedef struct ResampleContext { const AVClass *av_class; uint8_t *filter_bank; int filter_length; + int filter_alloc; int ideal_dst_incr; int dst_incr; int index; @@ -89,7 +90,7 @@ static double bessel(double x){ * @param type 0->cubic, 1->blackman nuttall windowed sinc, 2..16->kaiser windowed sinc beta=2..16 * @return 0 on success, negative on error */ -static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int phase_count, int scale, int type){ +static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int alloc, int phase_count, int scale, int type){ int ph, i; double x, y, w; double *tab = av_malloc(tap_count * sizeof(*tab)); @@ -133,19 +134,19 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap switch(c->format){ case AV_SAMPLE_FMT_S16P: for(i=0;ilinear = linear; c->factor = factor; c->filter_length = FFMAX((int)ceil(filter_size/factor), 1); - c->filter_bank = av_mallocz(c->filter_length*(phase_count+1)*c->felem_size); + c->filter_alloc = FFALIGN(c->filter_length, 8); + c->filter_bank = av_mallocz(c->filter_alloc*(phase_count+1)*c->felem_size); if (!c->filter_bank) goto error; - if (build_filter(c, (void*)c->filter_bank, factor, c->filter_length, phase_count, 1<filter_shift, WINDOW_TYPE)) + if (build_filter(c, (void*)c->filter_bank, factor, c->filter_length, c->filter_alloc, phase_count, 1<filter_shift, WINDOW_TYPE)) goto error; - memcpy(c->filter_bank + (c->filter_length*phase_count+1)*c->felem_size, c->filter_bank, (c->filter_length-1)*c->felem_size); - memcpy(c->filter_bank + (c->filter_length*phase_count )*c->felem_size, c->filter_bank + (c->filter_length - 1)*c->felem_size, c->felem_size); + memcpy(c->filter_bank + (c->filter_alloc*phase_count+1)*c->felem_size, c->filter_bank, (c->filter_alloc-1)*c->felem_size); + memcpy(c->filter_bank + (c->filter_alloc*phase_count )*c->felem_size, c->filter_bank + (c->filter_alloc - 1)*c->felem_size, c->felem_size); } c->compensation_distance= 0; diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c index 4060c66a43..13d7189842 100644 --- a/libswresample/resample_template.c +++ b/libswresample/resample_template.c @@ -50,7 +50,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int frac = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr; }else if(compensation_distance == 0 && !c->linear && index >= 0){ for(dst_index=0; dst_index < dst_size; dst_index++){ - FELEM *filter= ((FELEM*)c->filter_bank) + c->filter_length*(index & c->phase_mask); + FELEM *filter= ((FELEM*)c->filter_bank) + c->filter_alloc*(index & c->phase_mask); int sample_index= index >> c->phase_shift; if(sample_index + c->filter_length > src_size){ @@ -72,7 +72,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int } }else{ for(dst_index=0; dst_index < dst_size; dst_index++){ - FELEM *filter= ((FELEM*)c->filter_bank) + c->filter_length*(index & c->phase_mask); + FELEM *filter= ((FELEM*)c->filter_bank) + c->filter_alloc*(index & c->phase_mask); int sample_index= index >> c->phase_shift; FELEM2 val=0; @@ -85,7 +85,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int FELEM2 v2=0; for(i=0; ifilter_length; i++){ val += src[sample_index + i] * (FELEM2)filter[i]; - v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_length]; + v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc]; } val+=(v2-val)*(FELEML)frac / c->src_incr; }else{