From d56668bd80075615b89aff652fe8a576bf853ceb Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sun, 20 Jan 2013 15:41:52 -0800 Subject: [PATCH 1/2] floatdsp: move scalarproduct_float from dsputil to avfloatdsp. This makes the aac decoder and all voice codecs independent of dsputil. --- libavcodec/aac.h | 1 - libavcodec/aacdec.c | 3 +-- libavcodec/acelp_pitch_delay.c | 4 ++-- libavcodec/acelp_vectors.c | 6 +++--- libavcodec/amrnbdec.c | 20 ++++++++--------- libavcodec/amrwbdec.c | 33 +++++++++++++++-------------- libavcodec/arm/dsputil_init_neon.c | 3 --- libavcodec/arm/dsputil_neon.S | 13 ------------ libavcodec/dsputil.c | 12 ----------- libavcodec/dsputil.h | 18 ---------------- libavcodec/qcelpdec.c | 17 +++++++-------- libavcodec/ra288.c | 4 ++-- libavcodec/sipr.c | 15 +++++++------ libavcodec/sipr16k.c | 8 +++---- libavcodec/wmavoice.c | 16 ++++++++------ libavcodec/x86/dsputil.asm | 26 ----------------------- libavcodec/x86/dsputil_mmx.c | 6 ------ libavutil/arm/float_dsp_init_neon.c | 3 +++ libavutil/arm/float_dsp_neon.S | 13 ++++++++++++ libavutil/float_dsp.c | 12 +++++++++++ libavutil/float_dsp.h | 22 +++++++++++++++++++ libavutil/x86/float_dsp.asm | 25 ++++++++++++++++++++++ libavutil/x86/float_dsp_init.c | 3 +++ 23 files changed, 142 insertions(+), 141 deletions(-) diff --git a/libavcodec/aac.h b/libavcodec/aac.h index 6c5d962dd8..dd337a0a75 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -291,7 +291,6 @@ typedef struct AACContext { FFTContext mdct; FFTContext mdct_small; FFTContext mdct_ltp; - DSPContext dsp; FmtConvertContext fmt_conv; AVFloatDSPContext fdsp; int random_state; diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index b016611fcf..5afc9b820e 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -895,7 +895,6 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) ff_aac_sbr_init(); - ff_dsputil_init(&ac->dsp, avctx); ff_fmt_convert_init(&ac->fmt_conv, avctx); avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); @@ -1358,7 +1357,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], cfo[k] = ac->random_state; } - band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len); + band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len); scale = sf[idx] / sqrtf(band_energy); ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); } diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c index a9668fac70..ab09bdb6c5 100644 --- a/libavcodec/acelp_pitch_delay.c +++ b/libavcodec/acelp_pitch_delay.c @@ -21,9 +21,9 @@ */ #include "libavutil/common.h" +#include "libavutil/float_dsp.h" #include "libavutil/mathematics.h" #include "avcodec.h" -#include "dsputil.h" #include "acelp_pitch_delay.h" #include "celp_math.h" @@ -120,7 +120,7 @@ float ff_amr_set_fixed_gain(float fixed_gain_factor, float fixed_mean_energy, // Note 10^(0.05 * -10log(average x2)) = 1/sqrt((average x2)). float val = fixed_gain_factor * exp2f(M_LOG2_10 * 0.05 * - (ff_scalarproduct_float_c(pred_table, prediction_error, 4) + + (avpriv_scalarproduct_float_c(pred_table, prediction_error, 4) + energy_mean)) / sqrtf(fixed_mean_energy); diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c index b50c5f3ffe..a85e45f4c7 100644 --- a/libavcodec/acelp_vectors.c +++ b/libavcodec/acelp_vectors.c @@ -23,8 +23,8 @@ #include #include "libavutil/common.h" +#include "libavutil/float_dsp.h" #include "avcodec.h" -#include "dsputil.h" #include "acelp_vectors.h" const uint8_t ff_fc_2pulses_9bits_track1[16] = @@ -183,7 +183,7 @@ void ff_adaptive_gain_control(float *out, const float *in, float speech_energ, int size, float alpha, float *gain_mem) { int i; - float postfilter_energ = ff_scalarproduct_float_c(in, in, size); + float postfilter_energ = avpriv_scalarproduct_float_c(in, in, size); float gain_scale_factor = 1.0; float mem = *gain_mem; @@ -204,7 +204,7 @@ void ff_scale_vector_to_given_sum_of_squares(float *out, const float *in, float sum_of_squares, const int n) { int i; - float scalefactor = ff_scalarproduct_float_c(in, in, n); + float scalefactor = avpriv_scalarproduct_float_c(in, in, n); if (scalefactor) scalefactor = sqrt(sum_of_squares / scalefactor); for (i = 0; i < n; i++) diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c index 5c359a8f3d..7db12dd001 100644 --- a/libavcodec/amrnbdec.c +++ b/libavcodec/amrnbdec.c @@ -44,8 +44,8 @@ #include #include "libavutil/channel_layout.h" +#include "libavutil/float_dsp.h" #include "avcodec.h" -#include "dsputil.h" #include "libavutil/common.h" #include "celp_filters.h" #include "acelp_filters.h" @@ -794,8 +794,8 @@ static int synthesis(AMRContext *p, float *lpc, // emphasize pitch vector contribution if (p->pitch_gain[4] > 0.5 && !overflow) { - float energy = ff_scalarproduct_float_c(excitation, excitation, - AMR_SUBFRAME_SIZE); + float energy = avpriv_scalarproduct_float_c(excitation, excitation, + AMR_SUBFRAME_SIZE); float pitch_factor = p->pitch_gain[4] * (p->cur_frame_mode == MODE_12k2 ? @@ -871,8 +871,8 @@ static float tilt_factor(float *lpc_n, float *lpc_d) ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, LP_FILTER_ORDER); - rh0 = ff_scalarproduct_float_c(hf, hf, AMR_TILT_RESPONSE); - rh1 = ff_scalarproduct_float_c(hf, hf + 1, AMR_TILT_RESPONSE - 1); + rh0 = avpriv_scalarproduct_float_c(hf, hf, AMR_TILT_RESPONSE); + rh1 = avpriv_scalarproduct_float_c(hf, hf + 1, AMR_TILT_RESPONSE - 1); // The spec only specifies this check for 12.2 and 10.2 kbit/s // modes. But in the ref source the tilt is always non-negative. @@ -892,8 +892,8 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out) int i; float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input - float speech_gain = ff_scalarproduct_float_c(samples, samples, - AMR_SUBFRAME_SIZE); + float speech_gain = avpriv_scalarproduct_float_c(samples, samples, + AMR_SUBFRAME_SIZE); float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter const float *gamma_n, *gamma_d; // Formant filter factor table @@ -998,9 +998,9 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, p->fixed_gain[4] = ff_amr_set_fixed_gain(fixed_gain_factor, - ff_scalarproduct_float_c(p->fixed_vector, - p->fixed_vector, - AMR_SUBFRAME_SIZE) / + avpriv_scalarproduct_float_c(p->fixed_vector, + p->fixed_vector, + AMR_SUBFRAME_SIZE) / AMR_SUBFRAME_SIZE, p->prediction_error, energy_mean[p->cur_frame_mode], energy_pred_fac); diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c index 01d95f68df..553ec3dfa2 100644 --- a/libavcodec/amrwbdec.c +++ b/libavcodec/amrwbdec.c @@ -26,10 +26,10 @@ #include "libavutil/channel_layout.h" #include "libavutil/common.h" +#include "libavutil/float_dsp.h" #include "libavutil/lfg.h" #include "avcodec.h" -#include "dsputil.h" #include "lsp.h" #include "celp_filters.h" #include "acelp_filters.h" @@ -595,11 +595,11 @@ static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector) static float voice_factor(float *p_vector, float p_gain, float *f_vector, float f_gain) { - double p_ener = (double) ff_scalarproduct_float_c(p_vector, p_vector, - AMRWB_SFR_SIZE) * + double p_ener = (double) avpriv_scalarproduct_float_c(p_vector, p_vector, + AMRWB_SFR_SIZE) * p_gain * p_gain; - double f_ener = (double) ff_scalarproduct_float_c(f_vector, f_vector, - AMRWB_SFR_SIZE) * + double f_ener = (double) avpriv_scalarproduct_float_c(f_vector, f_vector, + AMRWB_SFR_SIZE) * f_gain * f_gain; return (p_ener - f_ener) / (p_ener + f_ener); @@ -768,8 +768,8 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation, /* emphasize pitch vector contribution in low bitrate modes */ if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) { int i; - float energy = ff_scalarproduct_float_c(excitation, excitation, - AMRWB_SFR_SIZE); + float energy = avpriv_scalarproduct_float_c(excitation, excitation, + AMRWB_SFR_SIZE); // XXX: Weird part in both ref code and spec. A unknown parameter // {beta} seems to be identical to the current pitch gain @@ -828,9 +828,9 @@ static void upsample_5_4(float *out, const float *in, int o_size) i++; for (k = 1; k < 5; k++) { - out[i] = ff_scalarproduct_float_c(in0 + int_part, - upsample_fir[4 - frac_part], - UPS_MEM_SIZE); + out[i] = avpriv_scalarproduct_float_c(in0 + int_part, + upsample_fir[4 - frac_part], + UPS_MEM_SIZE); int_part++; frac_part--; i++; @@ -856,8 +856,8 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth, if (ctx->fr_cur_mode == MODE_23k85) return qua_hb_gain[hb_idx] * (1.0f / (1 << 14)); - tilt = ff_scalarproduct_float_c(synth, synth + 1, AMRWB_SFR_SIZE - 1) / - ff_scalarproduct_float_c(synth, synth, AMRWB_SFR_SIZE); + tilt = avpriv_scalarproduct_float_c(synth, synth + 1, AMRWB_SFR_SIZE - 1) / + avpriv_scalarproduct_float_c(synth, synth, AMRWB_SFR_SIZE); /* return gain bounded by [0.1, 1.0] */ return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0); @@ -876,7 +876,8 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc, const float *synth_exc, float hb_gain) { int i; - float energy = ff_scalarproduct_float_c(synth_exc, synth_exc, AMRWB_SFR_SIZE); + float energy = avpriv_scalarproduct_float_c(synth_exc, synth_exc, + AMRWB_SFR_SIZE); /* Generate a white-noise excitation */ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) @@ -1168,9 +1169,9 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, ctx->fixed_gain[0] = ff_amr_set_fixed_gain(fixed_gain_factor, - ff_scalarproduct_float_c(ctx->fixed_vector, - ctx->fixed_vector, - AMRWB_SFR_SIZE) / + avpriv_scalarproduct_float_c(ctx->fixed_vector, + ctx->fixed_vector, + AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE, ctx->prediction_error, ENERGY_MEAN, energy_pred_fac); diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 0e42158f19..f27aee4fb1 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -142,8 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); -float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); - void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, int len); void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, @@ -293,7 +291,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; } - c->scalarproduct_float = ff_scalarproduct_float_neon; c->vector_clipf = ff_vector_clipf_neon; c->vector_clip_int32 = ff_vector_clip_int32_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index a9b3a3d8b3..cf92817ba6 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -531,19 +531,6 @@ function ff_add_pixels_clamped_neon, export=1 bx lr endfunc -function ff_scalarproduct_float_neon, export=1 - vmov.f32 q2, #0.0 -1: vld1.32 {q0},[r0,:128]! - vld1.32 {q1},[r1,:128]! - vmla.f32 q2, q0, q1 - subs r2, r2, #4 - bgt 1b - vadd.f32 d0, d4, d5 - vpadd.f32 d0, d0, d0 -NOVFP vmov.32 r0, d0[0] - bx lr -endfunc - function ff_vector_clipf_neon, export=1 VFP vdup.32 q1, d0[1] VFP vdup.32 q0, d0[0] diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 8ce741a308..caf1b071d7 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2353,17 +2353,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WRAPPER8_16_SQ(rd8x8_c, rd16_c) WRAPPER8_16_SQ(bit8x8_c, bit16_c) -float ff_scalarproduct_float_c(const float *v1, const float *v2, int len) -{ - float p = 0.0; - int i; - - for (i = 0; i < len; i++) - p += v1[i] * v2[i]; - - return p; -} - static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, uint32_t maxi, uint32_t maxisign) { @@ -2694,7 +2683,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; c->apply_window_int16 = apply_window_int16_c; c->vector_clip_int32 = vector_clip_int32_c; - c->scalarproduct_float = ff_scalarproduct_float_c; c->shrink[0]= av_image_copy_plane; c->shrink[1]= ff_shrink22; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 57afcdaaa8..9b88058345 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -342,13 +342,6 @@ typedef struct DSPContext { /* assume len is a multiple of 8, and arrays are 16-byte aligned */ void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); - /** - * Calculate the scalar product of two vectors of floats. - * @param v1 first vector, 16-byte aligned - * @param v2 second vector, 16-byte aligned - * @param len length of vectors, multiple of 4 - */ - float (*scalarproduct_float)(const float *v1, const float *v2, int len); /* (I)DCT */ void (*fdct)(DCTELEM *block/* align 16*/); @@ -454,17 +447,6 @@ void ff_dsputil_init(DSPContext* p, AVCodecContext *avctx); int ff_check_alignment(void); -/** - * Return the scalar product of two vectors. - * - * @param v1 first input vector - * @param v2 first input vector - * @param len number of elements - * - * @return sum of elementwise products - */ -float ff_scalarproduct_float_c(const float *v1, const float *v2, int len); - /** * permute block according to permuatation. * @param last last non zero element in scantable order diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c index b702175c19..59220d53e3 100644 --- a/libavcodec/qcelpdec.c +++ b/libavcodec/qcelpdec.c @@ -30,10 +30,10 @@ #include #include "libavutil/channel_layout.h" +#include "libavutil/float_dsp.h" #include "avcodec.h" #include "internal.h" #include "get_bits.h" -#include "dsputil.h" #include "qcelpdata.h" #include "celp_filters.h" #include "acelp_filters.h" @@ -400,12 +400,10 @@ static void apply_gain_ctrl(float *v_out, const float *v_ref, const float *v_in) { int i; - for (i = 0; i < 160; i += 40) - ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i, - ff_scalarproduct_float_c(v_ref + i, - v_ref + i, - 40), - 40); + for (i = 0; i < 160; i += 40) { + float res = avpriv_scalarproduct_float_c(v_ref + i, v_ref + i, 40); + ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i, res, 40); + } } /** @@ -680,8 +678,9 @@ static void postfilter(QCELPContext *q, float *samples, float *lpc) ff_tilt_compensation(&q->postfilter_tilt_mem, 0.3, pole_out + 10, 160); ff_adaptive_gain_control(samples, pole_out + 10, - ff_scalarproduct_float_c(q->formant_mem + 10, - q->formant_mem + 10, 160), + avpriv_scalarproduct_float_c(q->formant_mem + 10, + q->formant_mem + 10, + 160), 160, 0.9375, &q->postfilter_agc_mem); } diff --git a/libavcodec/ra288.c b/libavcodec/ra288.c index 8266673aec..319bdd4e22 100644 --- a/libavcodec/ra288.c +++ b/libavcodec/ra288.c @@ -79,7 +79,7 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx) static void convolve(float *tgt, const float *src, int len, int n) { for (; n >= 0; n--) - tgt[n] = ff_scalarproduct_float_c(src, src - n, len); + tgt[n] = avpriv_scalarproduct_float_c(src, src - n, len); } @@ -108,7 +108,7 @@ static void decode(RA288Context *ractx, float gain, int cb_coef) for (i=0; i < 5; i++) buffer[i] = codetable[cb_coef][i] * sumsum; - sum = ff_scalarproduct_float_c(buffer, buffer, 5) * ((1 << 24) / 5.); + sum = avpriv_scalarproduct_float_c(buffer, buffer, 5) * ((1 << 24) / 5.); sum = FFMAX(sum, 1); diff --git a/libavcodec/sipr.c b/libavcodec/sipr.c index d482b0f068..3f3c13c6e1 100644 --- a/libavcodec/sipr.c +++ b/libavcodec/sipr.c @@ -26,11 +26,11 @@ #include #include "libavutil/channel_layout.h" +#include "libavutil/float_dsp.h" #include "libavutil/mathematics.h" #include "avcodec.h" #define BITSTREAM_READER_LE #include "get_bits.h" -#include "dsputil.h" #include "internal.h" #include "lsp.h" @@ -411,9 +411,10 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params, convolute_with_sparse(fixed_vector, &fixed_cb, impulse_response, SUBFR_SIZE); - avg_energy = - (0.01 + ff_scalarproduct_float_c(fixed_vector, fixed_vector, SUBFR_SIZE)) / - SUBFR_SIZE; + avg_energy = (0.01 + avpriv_scalarproduct_float_c(fixed_vector, + fixed_vector, + SUBFR_SIZE)) / + SUBFR_SIZE; ctx->past_pitch_gain = pitch_gain = gain_cb[params->gc_index[i]][0]; @@ -454,9 +455,9 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params, if (ctx->mode == MODE_5k0) { for (i = 0; i < subframe_count; i++) { - float energy = ff_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE, - ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE, - SUBFR_SIZE); + float energy = avpriv_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE, + ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE, + SUBFR_SIZE); ff_adaptive_gain_control(&synth[i * SUBFR_SIZE], &synth[i * SUBFR_SIZE], energy, SUBFR_SIZE, 0.9, &ctx->postfilter_agc); diff --git a/libavcodec/sipr16k.c b/libavcodec/sipr16k.c index bff739e44f..a472dfd59a 100644 --- a/libavcodec/sipr16k.c +++ b/libavcodec/sipr16k.c @@ -25,8 +25,8 @@ #include "sipr.h" #include "libavutil/common.h" +#include "libavutil/float_dsp.h" #include "libavutil/mathematics.h" -#include "dsputil.h" #include "lsp.h" #include "celp_filters.h" #include "acelp_vectors.h" @@ -163,11 +163,11 @@ static float acelp_decode_gain_codef(float gain_corr_factor, const float *fc_v, const float *ma_prediction_coeff, int subframe_size, int ma_pred_order) { - mr_energy += - ff_scalarproduct_float_c(quant_energy, ma_prediction_coeff, ma_pred_order); + mr_energy += avpriv_scalarproduct_float_c(quant_energy, ma_prediction_coeff, + ma_pred_order); mr_energy = gain_corr_factor * exp(M_LN10 / 20. * mr_energy) / - sqrt((0.01 + ff_scalarproduct_float_c(fc_v, fc_v, subframe_size))); + sqrt((0.01 + avpriv_scalarproduct_float_c(fc_v, fc_v, subframe_size))); return mr_energy; } diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c index 08d0600200..ba778cda31 100644 --- a/libavcodec/wmavoice.c +++ b/libavcodec/wmavoice.c @@ -30,8 +30,8 @@ #include #include "libavutil/channel_layout.h" +#include "libavutil/float_dsp.h" #include "libavutil/mem.h" -#include "dsputil.h" #include "avcodec.h" #include "internal.h" #include "get_bits.h" @@ -523,7 +523,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch, /* find best fitting point in history */ do { - dot = ff_scalarproduct_float_c(in, ptr, size); + dot = avpriv_scalarproduct_float_c(in, ptr, size); if (dot > optimal_gain) { optimal_gain = dot; best_hist_ptr = ptr; @@ -532,7 +532,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch, if (optimal_gain <= 0) return -1; - dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size); + dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size); if (dot <= 0) // would be 1.0 return -1; @@ -562,8 +562,8 @@ static float tilt_factor(const float *lpcs, int n_lpcs) { float rh0, rh1; - rh0 = 1.0 + ff_scalarproduct_float_c(lpcs, lpcs, n_lpcs); - rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1); + rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs); + rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1); return rh1 / rh0; } @@ -656,7 +656,8 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs, -1.8 * tilt_factor(coeffs, remainder - 1), coeffs, remainder); } - sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder)); + sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs, + remainder)); for (n = 0; n < remainder; n++) coeffs[n] *= sq; } @@ -1320,7 +1321,8 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, /* Calculate gain for adaptive & fixed codebook signal. * see ff_amr_set_fixed_gain(). */ idx = get_bits(gb, 7); - fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) - + fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err, + gain_coeff, 6) - 5.2409161640 + wmavoice_gain_codebook_fcb[idx]); acb_gain = wmavoice_gain_codebook_acb[idx]; pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx], diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index 27e77d565d..65f4b37d8f 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -463,32 +463,6 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left .src_unaligned: ADD_HFYU_LEFT_LOOP 0, 0 - -; float scalarproduct_float_sse(const float *v1, const float *v2, int len) -INIT_XMM sse -cglobal scalarproduct_float, 3,3,2, v1, v2, offset - neg offsetq - shl offsetq, 2 - sub v1q, offsetq - sub v2q, offsetq - xorps xmm0, xmm0 - .loop: - movaps xmm1, [v1q+offsetq] - mulps xmm1, [v2q+offsetq] - addps xmm0, xmm1 - add offsetq, 16 - js .loop - movhlps xmm1, xmm0 - addps xmm0, xmm1 - movss xmm1, xmm0 - shufps xmm0, xmm0, 1 - addss xmm0, xmm1 -%if ARCH_X86_64 == 0 - movss r0m, xmm0 - fld dword r0m -%endif - RET - ;----------------------------------------------------------------------------- ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, ; int32_t max, unsigned int len) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 503764817a..65247c0016 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1846,8 +1846,6 @@ int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left); -float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); - void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, @@ -2128,10 +2126,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) c->vector_clipf = vector_clipf_sse; #endif /* HAVE_INLINE_ASM */ - -#if HAVE_YASM - c->scalarproduct_float = ff_scalarproduct_float_sse; -#endif /* HAVE_YASM */ } static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index b3644e82a2..a7245ad92b 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -43,6 +43,8 @@ void ff_vector_fmul_reverse_neon(float *dst, const float *src0, void ff_butterflies_float_neon(float *v1, float *v2, int len); +float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; @@ -52,4 +54,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) fdsp->vector_fmul_add = ff_vector_fmul_add_neon; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; fdsp->butterflies_float = ff_butterflies_float_neon; + fdsp->scalarproduct_float = ff_scalarproduct_float_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index 4acc406d33..559b565628 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -256,3 +256,16 @@ function ff_butterflies_float_neon, export=1 bgt 1b bx lr endfunc + +function ff_scalarproduct_float_neon, export=1 + vmov.f32 q2, #0.0 +1: vld1.32 {q0},[r0,:128]! + vld1.32 {q1},[r1,:128]! + vmla.f32 q2, q0, q1 + subs r2, r2, #4 + bgt 1b + vadd.f32 d0, d4, d5 + vpadd.f32 d0, d0, d0 +NOVFP vmov.32 r0, d0[0] + bx lr +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index c6e2b41d66..a40b029a29 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -101,6 +101,17 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2, } } +float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len) +{ + float p = 0.0; + int i; + + for (i = 0; i < len; i++) + p += v1[i] * v2[i]; + + return p; +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; @@ -111,6 +122,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) fdsp->vector_fmul_add = vector_fmul_add_c; fdsp->vector_fmul_reverse = vector_fmul_reverse_c; fdsp->butterflies_float = butterflies_float_c; + fdsp->scalarproduct_float = avpriv_scalarproduct_float_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index ec57b36f79..f2b90a4848 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -146,8 +146,30 @@ typedef struct AVFloatDSPContext { * @param len length of vectors, multiple of 4 */ void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); + + /** + * Calculate the scalar product of two vectors of floats. + * + * @param v1 first vector, 16-byte aligned + * @param v2 second vector, 16-byte aligned + * @param len length of vectors, multiple of 4 + * + * @return sum of elementwise products + */ + float (*scalarproduct_float)(const float *v1, const float *v2, int len); } AVFloatDSPContext; +/** + * Return the scalar product of two vectors. + * + * @param v1 first input vector + * @param v2 first input vector + * @param len number of elements + * + * @return sum of elementwise products + */ +float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len); + /** * Initialize a float DSP context. * diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index 126f3495c4..779339c575 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -227,3 +227,28 @@ INIT_XMM sse VECTOR_FMUL_REVERSE INIT_YMM avx VECTOR_FMUL_REVERSE + +; float scalarproduct_float_sse(const float *v1, const float *v2, int len) +INIT_XMM sse +cglobal scalarproduct_float, 3,3,2, v1, v2, offset + neg offsetq + shl offsetq, 2 + sub v1q, offsetq + sub v2q, offsetq + xorps xmm0, xmm0 +.loop: + movaps xmm1, [v1q+offsetq] + mulps xmm1, [v2q+offsetq] + addps xmm0, xmm1 + add offsetq, 16 + js .loop + movhlps xmm1, xmm0 + addps xmm0, xmm1 + movss xmm1, xmm0 + shufps xmm0, xmm0, 1 + addss xmm0, xmm1 +%if ARCH_X86_64 == 0 + movss r0m, xmm0 + fld dword r0m +%endif + RET diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 9f63b4c057..81c9a7d468 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -51,6 +51,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0, void ff_vector_fmul_reverse_avx(float *dst, const float *src0, const float *src1, int len); +float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); + #if HAVE_6REGS && HAVE_INLINE_ASM static void vector_fmul_window_3dnowext(float *dst, const float *src0, const float *src1, const float *win, @@ -135,6 +137,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; fdsp->vector_fmul_add = ff_vector_fmul_add_sse; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; + fdsp->scalarproduct_float = ff_scalarproduct_float_sse; } if (EXTERNAL_SSE2(mm_flags)) { fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; From 73b704ac609d83e0be124589f24efd9b94947cf9 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 21 Jan 2013 10:16:02 +0100 Subject: [PATCH 2/2] arm: Add some missing header #includes --- libavcodec/arm/h264pred_init_arm.c | 1 + libavcodec/arm/vp3dsp_init_arm.c | 1 + libavcodec/arm/vp8dsp_init_arm.c | 1 + libavcodec/arm/vp8dsp_init_armv6.c | 2 ++ libavcodec/arm/vp8dsp_init_neon.c | 2 ++ 5 files changed, 7 insertions(+) diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c index 39c012127d..0431fc8691 100644 --- a/libavcodec/arm/h264pred_init_arm.c +++ b/libavcodec/arm/h264pred_init_arm.c @@ -21,6 +21,7 @@ #include #include "libavutil/arm/cpu.h" +#include "libavcodec/avcodec.h" #include "libavcodec/h264pred.h" void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride); diff --git a/libavcodec/arm/vp3dsp_init_arm.c b/libavcodec/arm/vp3dsp_init_arm.c index ea99bfd2b3..e9f3fd3f17 100644 --- a/libavcodec/arm/vp3dsp_init_arm.c +++ b/libavcodec/arm/vp3dsp_init_arm.c @@ -21,6 +21,7 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/arm/cpu.h" +#include "libavcodec/dsputil.h" #include "libavcodec/vp3dsp.h" void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); diff --git a/libavcodec/arm/vp8dsp_init_arm.c b/libavcodec/arm/vp8dsp_init_arm.c index 603f68cd24..b7897cd9fd 100644 --- a/libavcodec/arm/vp8dsp_init_arm.c +++ b/libavcodec/arm/vp8dsp_init_arm.c @@ -18,6 +18,7 @@ #include +#include "libavutil/attributes.h" #include "libavutil/arm/cpu.h" #include "libavcodec/vp8dsp.h" #include "vp8dsp.h" diff --git a/libavcodec/arm/vp8dsp_init_armv6.c b/libavcodec/arm/vp8dsp_init_armv6.c index 85a803af83..c3d024f5cb 100644 --- a/libavcodec/arm/vp8dsp_init_armv6.c +++ b/libavcodec/arm/vp8dsp_init_armv6.c @@ -17,6 +17,8 @@ */ #include + +#include "libavutil/attributes.h" #include "libavcodec/vp8dsp.h" #include "vp8dsp.h" diff --git a/libavcodec/arm/vp8dsp_init_neon.c b/libavcodec/arm/vp8dsp_init_neon.c index dbe5b9f961..965243c3e4 100644 --- a/libavcodec/arm/vp8dsp_init_neon.c +++ b/libavcodec/arm/vp8dsp_init_neon.c @@ -17,6 +17,8 @@ */ #include + +#include "libavutil/attributes.h" #include "libavcodec/vp8dsp.h" #include "vp8dsp.h"