diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 398326c8c8..65db20d2b3 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -154,8 +154,6 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0, const float *src1, const float *win, int len); void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, int len); -void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, - int len); void ff_butterflies_float_neon(float *v1, float *v2, int len); float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); void ff_vector_fmul_reverse_neon(float *dst, const float *src0, @@ -329,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->vector_fmul_window = ff_vector_fmul_window_neon; c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; - c->vector_fmac_scalar = ff_vector_fmac_scalar_neon; c->butterflies_float = ff_butterflies_float_neon; c->scalarproduct_float = ff_scalarproduct_float_neon; c->vector_fmul_reverse = ff_vector_fmul_reverse_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index 9a5a40d6ac..358ed61299 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -682,54 +682,6 @@ NOVFP vdup.32 q8, r2 .unreq len endfunc -function ff_vector_fmac_scalar_neon, export=1 -VFP len .req r2 -VFP acc .req r3 -NOVFP len .req r3 -NOVFP acc .req r2 -VFP vdup.32 q15, d0[0] -NOVFP vdup.32 q15, r2 - bics r12, len, #15 - mov acc, r0 - beq 3f - vld1.32 {q0}, [r1,:128]! - vld1.32 {q8}, [acc,:128]! - vld1.32 {q1}, [r1,:128]! - vld1.32 {q9}, [acc,:128]! -1: vmla.f32 q8, q0, q15 - vld1.32 {q2}, [r1,:128]! - vld1.32 {q10}, [acc,:128]! - vmla.f32 q9, q1, q15 - vld1.32 {q3}, [r1,:128]! - vld1.32 {q11}, [acc,:128]! - vmla.f32 q10, q2, q15 - vst1.32 {q8}, [r0,:128]! - vmla.f32 q11, q3, q15 - vst1.32 {q9}, [r0,:128]! - subs r12, r12, #16 - beq 2f - vld1.32 {q0}, [r1,:128]! - vld1.32 {q8}, [acc,:128]! - vst1.32 {q10}, [r0,:128]! - vld1.32 {q1}, [r1,:128]! - vld1.32 {q9}, [acc,:128]! - vst1.32 {q11}, [r0,:128]! - b 1b -2: vst1.32 {q10}, [r0,:128]! - vst1.32 {q11}, [r0,:128]! - ands len, len, #15 - it eq - bxeq lr -3: vld1.32 {q0}, [r1,:128]! - vld1.32 {q8}, [acc,:128]! - vmla.f32 q8, q0, q15 - vst1.32 {q8}, [r0,:128]! - subs len, len, #4 - bgt 3b - bx lr - .unreq len -endfunc - function ff_butterflies_float_neon, export=1 1: vld1.32 {q0},[r0,:128] vld1.32 {q1},[r1,:128] diff --git a/libavcodec/dca.c b/libavcodec/dca.c index 103f0588e3..b37dc49d3f 100644 --- a/libavcodec/dca.c +++ b/libavcodec/dca.c @@ -27,6 +27,7 @@ #include #include "libavutil/common.h" +#include "libavutil/float_dsp.h" #include "libavutil/intmath.h" #include "libavutil/intreadwrite.h" #include "libavutil/mathematics.h" @@ -383,7 +384,7 @@ typedef struct { int profile; int debug_flag; ///< used for suppressing repeated error messages output - DSPContext dsp; + AVFloatDSPContext fdsp; FFTContext imdct; SynthFilterContext synth; DCADSPContext dcadsp; @@ -1865,8 +1866,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data, float *back_chan = s->samples + s->channel_order_tab[s->xch_base_channel] * 256; float *lt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256; float *rt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256; - s->dsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256); - s->dsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256); + s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256); + s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256); } if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) { @@ -1908,7 +1909,7 @@ static av_cold int dca_decode_init(AVCodecContext *avctx) s->avctx = avctx; dca_init_vlcs(); - ff_dsputil_init(&s->dsp, avctx); + avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); ff_mdct_init(&s->imdct, 6, 1, 1.0); ff_synth_filter_init(&s->synth); ff_dcadsp_init(&s->dcadsp); diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 942f606ea8..15f184e406 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2401,14 +2401,6 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul, dst[i] = src[i] * mul; } -static void vector_fmac_scalar_c(float *dst, const float *src, float mul, - int len) -{ - int i; - for (i = 0; i < len; i++) - dst[i] += src[i] * mul; -} - static void butterflies_float_c(float *restrict v1, float *restrict v2, int len) { @@ -2904,7 +2896,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->butterflies_float = butterflies_float_c; c->butterflies_float_interleave = butterflies_float_interleave_c; c->vector_fmul_scalar = vector_fmul_scalar_c; - c->vector_fmac_scalar = vector_fmac_scalar_c; c->shrink[0]= av_image_copy_plane; c->shrink[1]= ff_shrink22; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index ec3d7ee007..e54ae69831 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -416,17 +416,6 @@ typedef struct DSPContext { */ void (*vector_fmul_scalar)(float *dst, const float *src, float mul, int len); - /** - * Multiply a vector of floats by a scalar float and add to - * destination vector. Source and destination vectors must - * overlap exactly or not at all. - * @param dst result vector, 16-byte aligned - * @param src input vector, 16-byte aligned - * @param mul scalar value - * @param len length of vector, multiple of 4 - */ - void (*vector_fmac_scalar)(float *dst, const float *src, float mul, - int len); /** * Calculate the scalar product of two vectors of floats. * @param v1 first vector, 16-byte aligned diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index fa6d0d7d15..3ca0288b31 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -26,7 +26,11 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); +void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; + fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index d66fa09424..03b164388f 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1 3: vst1.32 {d16-d19},[r0,:128]! bx lr endfunc + +function ff_vector_fmac_scalar_neon, export=1 +VFP len .req r2 +VFP acc .req r3 +NOVFP len .req r3 +NOVFP acc .req r2 +VFP vdup.32 q15, d0[0] +NOVFP vdup.32 q15, r2 + bics r12, len, #15 + mov acc, r0 + beq 3f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [acc,:128]! +1: vmla.f32 q8, q0, q15 + vld1.32 {q2}, [r1,:128]! + vld1.32 {q10}, [acc,:128]! + vmla.f32 q9, q1, q15 + vld1.32 {q3}, [r1,:128]! + vld1.32 {q11}, [acc,:128]! + vmla.f32 q10, q2, q15 + vst1.32 {q8}, [r0,:128]! + vmla.f32 q11, q3, q15 + vst1.32 {q9}, [r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vst1.32 {q10}, [r0,:128]! + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [acc,:128]! + vst1.32 {q11}, [r0,:128]! + b 1b +2: vst1.32 {q10}, [r0,:128]! + vst1.32 {q11}, [r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vmla.f32 q8, q0, q15 + vst1.32 {q8}, [r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 039dd07d36..2e90939090 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1, dst[i] = src0[i] * src1[i]; } +static void vector_fmac_scalar_c(float *dst, const float *src, float mul, + int len) +{ + int i; + for (i = 0; i < len; i++) + dst[i] += src[i] * mul; +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; + fdsp->vector_fmac_scalar = vector_fmac_scalar_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 30161a252b..4e266304da 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext { */ void (*vector_fmul)(float *dst, const float *src0, const float *src1, int len); + + /** + * Multiply a vector of floats by a scalar float and add to + * destination vector. Source and destination vectors must + * overlap exactly or not at all. + * + * @param dst result vector + * constraints: 16-byte aligned + * @param src input vector + * constraints: 16-byte aligned + * @param mul scalar value + * @param len length of vector + * constraints: multiple of 4 + */ + void (*vector_fmac_scalar)(float *dst, const float *src, float mul, + int len); } AVFloatDSPContext; /**