FFmpeg/libavutil/float_dsp.c

/*
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "config.h"
#include "attributes.h"
#include "float_dsp.h"

static void vector_fmul_c(float *dst, const float *src0, const float *src1,
                          int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] = src0[i] * src1[i];
}

static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
                                 int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] += src[i] * mul;
}

static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
                                 int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] = src[i] * mul;
}

static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
                                 int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] = src[i] * mul;
}

static void vector_fmul_window_c(float *dst, const float *src0,
                                 const float *src1, const float *win, int len)
{
    int i, j;

    dst  += len;
    win  += len;
    src0 += len;

    for (i = -len, j = len - 1; i < 0; i++, j--) {
        float s0 = src0[i];
        float s1 = src1[j];
        float wi = win[i];
        float wj = win[j];
        dst[i] = s0 * wj - s1 * wi;
        dst[j] = s0 * wi + s1 * wj;
    }
}

static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
                              const float *src2, int len){
    int i;

    for (i = 0; i < len; i++)
        dst[i] = src0[i] * src1[i] + src2[i];
}

static void vector_fmul_reverse_c(float *dst, const float *src0,
                                  const float *src1, int len)
{
    int i;

    src1 += len-1;
    for (i = 0; i < len; i++)
        dst[i] = src0[i] * src1[-i];
}

static void butterflies_float_c(float *restrict v1, float *restrict v2,
                                int len)
{
    int i;

    for (i = 0; i < len; i++) {
        float t = v1[i] - v2[i];
        v1[i] += v2[i];
        v2[i] = t;
    }
}

float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
{
    float p = 0.0;
    int i;

    for (i = 0; i < len; i++)
        p += v1[i] * v2[i];

    return p;
}

av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
{
    fdsp->vector_fmul = vector_fmul_c;
    fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
    fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
    fdsp->vector_dmul_scalar = vector_dmul_scalar_c;
    fdsp->vector_fmul_window = vector_fmul_window_c;
    fdsp->vector_fmul_add = vector_fmul_add_c;
    fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
    fdsp->butterflies_float = butterflies_float_c;
    fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;

#if   ARCH_AARCH64
    ff_float_dsp_init_aarch64(fdsp);
#elif ARCH_ARM
    ff_float_dsp_init_arm(fdsp);
#elif ARCH_PPC
    ff_float_dsp_init_ppc(fdsp, bit_exact);
#elif ARCH_X86
    ff_float_dsp_init_x86(fdsp);
#endif
}

#ifdef TEST

#include <float.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "cpu.h"
#include "lfg.h"
#include "log.h"
#include "mem.h"
#include "random_seed.h"

#define LEN 240

static void fill_float_array(AVLFG *lfg, float *a, int len)
{
    int i;
    double bmg[2], stddev = 10.0, mean = 0.0;

    for (i = 0; i < len; i += 2) {
        av_bmg_get(lfg, bmg);
        a[i]     = bmg[0] * stddev + mean;
        a[i + 1] = bmg[1] * stddev + mean;
    }
}
static int compare_floats(const float *a, const float *b, int len,
                          float max_diff)
{
    int i;
    for (i = 0; i < len; i++) {
        if (fabsf(a[i] - b[i]) > max_diff) {
            av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
                   i, a[i], b[i], a[i] - b[i]);
            return -1;
        }
    }
    return 0;
}

static void fill_double_array(AVLFG *lfg, double *a, int len)
{
    int i;
    double bmg[2], stddev = 10.0, mean = 0.0;

    for (i = 0; i < len; i += 2) {
        av_bmg_get(lfg, bmg);
        a[i]     = bmg[0] * stddev + mean;
        a[i + 1] = bmg[1] * stddev + mean;
    }
}

static int compare_doubles(const double *a, const double *b, int len,
                           double max_diff)
{
    int i;

    for (i = 0; i < len; i++) {
        if (fabs(a[i] - b[i]) > max_diff) {
            av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",
                   i, a[i], b[i], a[i] - b[i]);
            return -1;
        }
    }
    return 0;
}

static int test_vector_fmul(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                            const float *v1, const float *v2)
{
    DECLARE_ALIGNED(32, float, cdst)[LEN];
    DECLARE_ALIGNED(32, float, odst)[LEN];
    int ret;

    cdsp->vector_fmul(cdst, v1, v2, LEN);
    fdsp->vector_fmul(odst, v1, v2, LEN);

    if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

#define ARBITRARY_FMAC_SCALAR_CONST 0.005
static int test_vector_fmac_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                                   const float *v1, const float *src0, float scale)
{
    DECLARE_ALIGNED(32, float, cdst)[LEN];
    DECLARE_ALIGNED(32, float, odst)[LEN];
    int ret;

    memcpy(cdst, v1, LEN * sizeof(*v1));
    memcpy(odst, v1, LEN * sizeof(*v1));

    cdsp->vector_fmac_scalar(cdst, src0, scale, LEN);
    fdsp->vector_fmac_scalar(odst, src0, scale, LEN);

    if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMAC_SCALAR_CONST))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

static int test_vector_fmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                                   const float *v1, float scale)
{
    DECLARE_ALIGNED(32, float, cdst)[LEN];
    DECLARE_ALIGNED(32, float, odst)[LEN];
    int ret;

    cdsp->vector_fmul_scalar(cdst, v1, scale, LEN);
    fdsp->vector_fmul_scalar(odst, v1, scale, LEN);

    if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

static int test_vector_dmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                                   const double *v1, double scale)
{
    DECLARE_ALIGNED(32, double, cdst)[LEN];
    DECLARE_ALIGNED(32, double, odst)[LEN];
    int ret;

    cdsp->vector_dmul_scalar(cdst, v1, scale, LEN);
    fdsp->vector_dmul_scalar(odst, v1, scale, LEN);

    if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

#define ARBITRARY_FMUL_WINDOW_CONST 0.008
static int test_vector_fmul_window(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                                   const float *v1, const float *v2, const float *v3)
{
    DECLARE_ALIGNED(32, float, cdst)[LEN];
    DECLARE_ALIGNED(32, float, odst)[LEN];
    int ret;

    cdsp->vector_fmul_window(cdst, v1, v2, v3, LEN / 2);
    fdsp->vector_fmul_window(odst, v1, v2, v3, LEN / 2);

    if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_WINDOW_CONST))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

#define ARBITRARY_FMUL_ADD_CONST 0.005
static int test_vector_fmul_add(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                                const float *v1, const float *v2, const float *v3)
{
    DECLARE_ALIGNED(32, float, cdst)[LEN];
    DECLARE_ALIGNED(32, float, odst)[LEN];
    int ret;

    cdsp->vector_fmul_add(cdst, v1, v2, v3, LEN);
    fdsp->vector_fmul_add(odst, v1, v2, v3, LEN);

    if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_ADD_CONST))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

static int test_vector_fmul_reverse(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                                    const float *v1, const float *v2)
{
    DECLARE_ALIGNED(32, float, cdst)[LEN];
    DECLARE_ALIGNED(32, float, odst)[LEN];
    int ret;

    cdsp->vector_fmul_reverse(cdst, v1, v2, LEN);
    fdsp->vector_fmul_reverse(odst, v1, v2, LEN);

    if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

static int test_butterflies_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                                  const float *v1, const float *v2)
{
    DECLARE_ALIGNED(32, float, cv1)[LEN];
    DECLARE_ALIGNED(32, float, cv2)[LEN];
    DECLARE_ALIGNED(32, float, ov1)[LEN];
    DECLARE_ALIGNED(32, float, ov2)[LEN];
    int ret;

    memcpy(cv1, v1, LEN * sizeof(*v1));
    memcpy(cv2, v2, LEN * sizeof(*v2));
    memcpy(ov1, v1, LEN * sizeof(*v1));
    memcpy(ov2, v2, LEN * sizeof(*v2));

    cdsp->butterflies_float(cv1, cv2, LEN);
    fdsp->butterflies_float(ov1, ov2, LEN);

    if ((ret = compare_floats(cv1, ov1, LEN, FLT_EPSILON)) ||
        (ret = compare_floats(cv2, ov2, LEN, FLT_EPSILON)))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

#define ARBITRARY_SCALARPRODUCT_CONST 0.2
static int test_scalarproduct_float(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
                                    const float *v1, const float *v2)
{
    float cprod, oprod;
    int ret;

    cprod = cdsp->scalarproduct_float(v1, v2, LEN);
    oprod = fdsp->scalarproduct_float(v1, v2, LEN);

    if (ret = compare_floats(&cprod, &oprod, 1, ARBITRARY_SCALARPRODUCT_CONST))
        av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);

    return ret;
}

int main(int argc, char **argv)
{
    int ret = 0;
    uint32_t seed;
    AVFloatDSPContext fdsp, cdsp;
    AVLFG lfg;

    DECLARE_ALIGNED(32, float, src0)[LEN];
    DECLARE_ALIGNED(32, float, src1)[LEN];
    DECLARE_ALIGNED(32, float, src2)[LEN];
    DECLARE_ALIGNED(32, double, dbl_src0)[LEN];
    DECLARE_ALIGNED(32, double, dbl_src1)[LEN];

    if (argc > 2 && !strcmp(argv[1], "-s"))
        seed = strtoul(argv[2], NULL, 10);
    else
        seed = av_get_random_seed();

    av_log(NULL, AV_LOG_INFO, "float_dsp-test: random seed %u\n", seed);

    av_lfg_init(&lfg, seed);

    fill_float_array(&lfg, src0, LEN);
    fill_float_array(&lfg, src1, LEN);
    fill_float_array(&lfg, src2, LEN);

    fill_double_array(&lfg, dbl_src0, LEN);
    fill_double_array(&lfg, dbl_src1, LEN);

    avpriv_float_dsp_init(&fdsp, 1);
    av_set_cpu_flags_mask(0);
    avpriv_float_dsp_init(&cdsp, 1);

    if (test_vector_fmul(&fdsp, &cdsp, src0, src1))
        ret -= 1 << 0;
    if (test_vector_fmac_scalar(&fdsp, &cdsp, src2, src0, src1[0]))
        ret -= 1 << 1;
    if (test_vector_fmul_scalar(&fdsp, &cdsp, src0, src1[0]))
        ret -= 1 << 2;
    if (test_vector_fmul_window(&fdsp, &cdsp, src0, src1, src2))
        ret -= 1 << 3;
    if (test_vector_fmul_add(&fdsp, &cdsp, src0, src1, src2))
        ret -= 1 << 4;
    if (test_vector_fmul_reverse(&fdsp, &cdsp, src0, src1))
        ret -= 1 << 5;
    if (test_butterflies_float(&fdsp, &cdsp, src0, src1))
        ret -= 1 << 6;
    if (test_scalarproduct_float(&fdsp, &cdsp, src0, src1))
        ret -= 1 << 7;
    if (test_vector_dmul_scalar(&fdsp, &cdsp, dbl_src0, dbl_src1[0]))
        ret -= 1 << 8;

    return ret;
}

#endif /* TEST */
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`/*`
			`* This file is part of Libav.`
			`*`
			`* Libav is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* Libav is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with Libav; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`#include "config.h"`
Drop pointless directory name prefixes from #includes in the current dir 12 years ago			`#include "attributes.h"`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`#include "float_dsp.h"`

			`static void vector_fmul_c(float dst, const float src0, const float *src1,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src0[i] * src1[i];`
			`}`

float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil 13 years ago			`static void vector_fmac_scalar_c(float dst, const float src, float mul,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] += src[i] * mul;`
			`}`

dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil 13 years ago			`static void vector_fmul_scalar_c(float dst, const float src, float mul,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src[i] * mul;`
			`}`

float_dsp: add vector_dmul_scalar() to multiply a vector of doubles Include x86-optimized versions for SSE2 and AVX. 13 years ago			`static void vector_dmul_scalar_c(double dst, const double src, double mul,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src[i] * mul;`
			`}`

lavc: Move vector_fmul_window to AVFloatDSPContext Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`static void vector_fmul_window_c(float dst, const float src0,`
			`const float src1, const float win, int len)`
			`{`
			`int i, j;`

			`dst += len;`
			`win += len;`
			`src0 += len;`

			`for (i = -len, j = len - 1; i < 0; i++, j--) {`
			`float s0 = src0[i];`
			`float s1 = src1[j];`
			`float wi = win[i];`
			`float wj = win[j];`
			`dst[i] = s0 * wj - s1 * wi;`
			`dst[j] = s0 * wi + s1 * wj;`
			`}`
			`}`

floatdsp: move vector_fmul_add from dsputil to avfloatdsp. 12 years ago			`static void vector_fmul_add_c(float dst, const float src0, const float *src1,`
			`const float *src2, int len){`
			`int i;`

			`for (i = 0; i < len; i++)`
			`dst[i] = src0[i] * src1[i] + src2[i];`
			`}`

floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp. Now, nellymoserenc and aacenc no longer depends on dsputil. Independent of this patch, wmaprodec also does not depend on dsputil, so I removed it from there also. 12 years ago			`static void vector_fmul_reverse_c(float dst, const float src0,`
			`const float *src1, int len)`
			`{`
			`int i;`

			`src1 += len-1;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src0[i] * src1[-i];`
			`}`

floatdsp: move butterflies_float from dsputil to avfloatdsp. This makes wmadec/enc, twinvq and mpegaudiodec (i.e. mp2/mp3) independent of dsputil. 12 years ago			`static void butterflies_float_c(float restrict v1, float restrict v2,`
			`int len)`
			`{`
			`int i;`

			`for (i = 0; i < len; i++) {`
			`float t = v1[i] - v2[i];`
			`v1[i] += v2[i];`
			`v2[i] = t;`
			`}`
			`}`

floatdsp: move scalarproduct_float from dsputil to avfloatdsp. This makes the aac decoder and all voice codecs independent of dsputil. 12 years ago			`float avpriv_scalarproduct_float_c(const float v1, const float v2, int len)`
			`{`
			`float p = 0.0;`
			`int i;`

			`for (i = 0; i < len; i++)`
			`p += v1[i] * v2[i];`

			`return p;`
			`}`

avutil: Add av_cold attributes to init functions missing them 12 years ago			`av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`{`
			`fdsp->vector_fmul = vector_fmul_c;`
float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil 13 years ago			`fdsp->vector_fmac_scalar = vector_fmac_scalar_c;`
dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil 13 years ago			`fdsp->vector_fmul_scalar = vector_fmul_scalar_c;`
float_dsp: add vector_dmul_scalar() to multiply a vector of doubles Include x86-optimized versions for SSE2 and AVX. 13 years ago			`fdsp->vector_dmul_scalar = vector_dmul_scalar_c;`
lavc: Move vector_fmul_window to AVFloatDSPContext Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`fdsp->vector_fmul_window = vector_fmul_window_c;`
floatdsp: move vector_fmul_add from dsputil to avfloatdsp. 12 years ago			`fdsp->vector_fmul_add = vector_fmul_add_c;`
floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp. Now, nellymoserenc and aacenc no longer depends on dsputil. Independent of this patch, wmaprodec also does not depend on dsputil, so I removed it from there also. 12 years ago			`fdsp->vector_fmul_reverse = vector_fmul_reverse_c;`
floatdsp: move butterflies_float from dsputil to avfloatdsp. This makes wmadec/enc, twinvq and mpegaudiodec (i.e. mp2/mp3) independent of dsputil. 12 years ago			`fdsp->butterflies_float = butterflies_float_c;`
floatdsp: move scalarproduct_float from dsputil to avfloatdsp. This makes the aac decoder and all voice codecs independent of dsputil. 12 years ago			`fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago
aarch64: float_dsp NEON assembler Ported from arm NEON and added vector_dmul_scalar. Functions between 1.5 and 5 times faster than the C implementations using Apple's clang-503.0.19 on A7. 11 years ago			`#if ARCH_AARCH64`
			`ff_float_dsp_init_aarch64(fdsp);`
			`#elif ARCH_ARM`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`ff_float_dsp_init_arm(fdsp);`
			`#elif ARCH_PPC`
			`ff_float_dsp_init_ppc(fdsp, bit_exact);`
			`#elif ARCH_X86`
			`ff_float_dsp_init_x86(fdsp);`
			`#endif`
			`}`
float_dsp: add test program and use it as fate test 11 years ago
			`#ifdef TEST`

			`#include <float.h>`
			`#include <math.h>`
			`#include <stdint.h>`
			`#include <stdlib.h>`
			`#include <string.h>`

			`#include "cpu.h"`
			`#include "lfg.h"`
			`#include "log.h"`
			`#include "mem.h"`
			`#include "random_seed.h"`

			`#define LEN 240`

			`static void fill_float_array(AVLFG lfg, float a, int len)`
			`{`
			`int i;`
			`double bmg[2], stddev = 10.0, mean = 0.0;`

			`for (i = 0; i < len; i += 2) {`
			`av_bmg_get(lfg, bmg);`
			`a[i] = bmg[0] * stddev + mean;`
			`a[i + 1] = bmg[1] * stddev + mean;`
			`}`
			`}`
			`static int compare_floats(const float a, const float b, int len,`
			`float max_diff)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++) {`
			`if (fabsf(a[i] - b[i]) > max_diff) {`
			`av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",`
			`i, a[i], b[i], a[i] - b[i]);`
			`return -1;`
			`}`
			`}`
			`return 0;`
			`}`

			`static void fill_double_array(AVLFG lfg, double a, int len)`
			`{`
			`int i;`
			`double bmg[2], stddev = 10.0, mean = 0.0;`

			`for (i = 0; i < len; i += 2) {`
			`av_bmg_get(lfg, bmg);`
			`a[i] = bmg[0] * stddev + mean;`
			`a[i + 1] = bmg[1] * stddev + mean;`
			`}`
			`}`

			`static int compare_doubles(const double a, const double b, int len,`
			`double max_diff)`
			`{`
			`int i;`

			`for (i = 0; i < len; i++) {`
			`if (fabs(a[i] - b[i]) > max_diff) {`
			`av_log(NULL, AV_LOG_ERROR, "%d: %- .12f - %- .12f = % .12g\n",`
			`i, a[i], b[i], a[i] - b[i]);`
			`return -1;`
			`}`
			`}`
			`return 0;`
			`}`

			`static int test_vector_fmul(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const float v1, const float v2)`
			`{`
			`DECLARE_ALIGNED(32, float, cdst)[LEN];`
			`DECLARE_ALIGNED(32, float, odst)[LEN];`
			`int ret;`

			`cdsp->vector_fmul(cdst, v1, v2, LEN);`
			`fdsp->vector_fmul(odst, v1, v2, LEN);`

			`if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`#define ARBITRARY_FMAC_SCALAR_CONST 0.005`
			`static int test_vector_fmac_scalar(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const float v1, const float src0, float scale)`
			`{`
			`DECLARE_ALIGNED(32, float, cdst)[LEN];`
			`DECLARE_ALIGNED(32, float, odst)[LEN];`
			`int ret;`

			`memcpy(cdst, v1, LEN * sizeof(*v1));`
			`memcpy(odst, v1, LEN * sizeof(*v1));`

			`cdsp->vector_fmac_scalar(cdst, src0, scale, LEN);`
			`fdsp->vector_fmac_scalar(odst, src0, scale, LEN);`

			`if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMAC_SCALAR_CONST))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`static int test_vector_fmul_scalar(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const float *v1, float scale)`
			`{`
			`DECLARE_ALIGNED(32, float, cdst)[LEN];`
			`DECLARE_ALIGNED(32, float, odst)[LEN];`
			`int ret;`

			`cdsp->vector_fmul_scalar(cdst, v1, scale, LEN);`
			`fdsp->vector_fmul_scalar(odst, v1, scale, LEN);`

			`if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`static int test_vector_dmul_scalar(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const double *v1, double scale)`
			`{`
			`DECLARE_ALIGNED(32, double, cdst)[LEN];`
			`DECLARE_ALIGNED(32, double, odst)[LEN];`
			`int ret;`

			`cdsp->vector_dmul_scalar(cdst, v1, scale, LEN);`
			`fdsp->vector_dmul_scalar(odst, v1, scale, LEN);`

			`if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`#define ARBITRARY_FMUL_WINDOW_CONST 0.008`
			`static int test_vector_fmul_window(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const float v1, const float v2, const float *v3)`
			`{`
			`DECLARE_ALIGNED(32, float, cdst)[LEN];`
			`DECLARE_ALIGNED(32, float, odst)[LEN];`
			`int ret;`

			`cdsp->vector_fmul_window(cdst, v1, v2, v3, LEN / 2);`
			`fdsp->vector_fmul_window(odst, v1, v2, v3, LEN / 2);`

			`if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_WINDOW_CONST))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`#define ARBITRARY_FMUL_ADD_CONST 0.005`
			`static int test_vector_fmul_add(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const float v1, const float v2, const float *v3)`
			`{`
			`DECLARE_ALIGNED(32, float, cdst)[LEN];`
			`DECLARE_ALIGNED(32, float, odst)[LEN];`
			`int ret;`

			`cdsp->vector_fmul_add(cdst, v1, v2, v3, LEN);`
			`fdsp->vector_fmul_add(odst, v1, v2, v3, LEN);`

			`if (ret = compare_floats(cdst, odst, LEN, ARBITRARY_FMUL_ADD_CONST))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`static int test_vector_fmul_reverse(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const float v1, const float v2)`
			`{`
			`DECLARE_ALIGNED(32, float, cdst)[LEN];`
			`DECLARE_ALIGNED(32, float, odst)[LEN];`
			`int ret;`

			`cdsp->vector_fmul_reverse(cdst, v1, v2, LEN);`
			`fdsp->vector_fmul_reverse(odst, v1, v2, LEN);`

			`if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`static int test_butterflies_float(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const float v1, const float v2)`
			`{`
			`DECLARE_ALIGNED(32, float, cv1)[LEN];`
			`DECLARE_ALIGNED(32, float, cv2)[LEN];`
			`DECLARE_ALIGNED(32, float, ov1)[LEN];`
			`DECLARE_ALIGNED(32, float, ov2)[LEN];`
			`int ret;`

			`memcpy(cv1, v1, LEN * sizeof(*v1));`
			`memcpy(cv2, v2, LEN * sizeof(*v2));`
			`memcpy(ov1, v1, LEN * sizeof(*v1));`
			`memcpy(ov2, v2, LEN * sizeof(*v2));`

			`cdsp->butterflies_float(cv1, cv2, LEN);`
			`fdsp->butterflies_float(ov1, ov2, LEN);`

			`if ((ret = compare_floats(cv1, ov1, LEN, FLT_EPSILON)) \|\|`
			`(ret = compare_floats(cv2, ov2, LEN, FLT_EPSILON)))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`#define ARBITRARY_SCALARPRODUCT_CONST 0.2`
			`static int test_scalarproduct_float(AVFloatDSPContext fdsp, AVFloatDSPContext cdsp,`
			`const float v1, const float v2)`
			`{`
			`float cprod, oprod;`
			`int ret;`

			`cprod = cdsp->scalarproduct_float(v1, v2, LEN);`
			`oprod = fdsp->scalarproduct_float(v1, v2, LEN);`

			`if (ret = compare_floats(&cprod, &oprod, 1, ARBITRARY_SCALARPRODUCT_CONST))`
			`av_log(NULL, AV_LOG_ERROR, "%s failed\n", __func__);`

			`return ret;`
			`}`

			`int main(int argc, char **argv)`
			`{`
			`int ret = 0;`
			`uint32_t seed;`
			`AVFloatDSPContext fdsp, cdsp;`
			`AVLFG lfg;`

			`DECLARE_ALIGNED(32, float, src0)[LEN];`
			`DECLARE_ALIGNED(32, float, src1)[LEN];`
			`DECLARE_ALIGNED(32, float, src2)[LEN];`
			`DECLARE_ALIGNED(32, double, dbl_src0)[LEN];`
			`DECLARE_ALIGNED(32, double, dbl_src1)[LEN];`

			`if (argc > 2 && !strcmp(argv[1], "-s"))`
			`seed = strtoul(argv[2], NULL, 10);`
			`else`
			`seed = av_get_random_seed();`

			`av_log(NULL, AV_LOG_INFO, "float_dsp-test: random seed %u\n", seed);`

			`av_lfg_init(&lfg, seed);`

			`fill_float_array(&lfg, src0, LEN);`
			`fill_float_array(&lfg, src1, LEN);`
			`fill_float_array(&lfg, src2, LEN);`

			`fill_double_array(&lfg, dbl_src0, LEN);`
			`fill_double_array(&lfg, dbl_src1, LEN);`

			`avpriv_float_dsp_init(&fdsp, 1);`
			`av_set_cpu_flags_mask(0);`
			`avpriv_float_dsp_init(&cdsp, 1);`

			`if (test_vector_fmul(&fdsp, &cdsp, src0, src1))`
			`ret -= 1 << 0;`
			`if (test_vector_fmac_scalar(&fdsp, &cdsp, src2, src0, src1[0]))`
			`ret -= 1 << 1;`
			`if (test_vector_fmul_scalar(&fdsp, &cdsp, src0, src1[0]))`
			`ret -= 1 << 2;`
			`if (test_vector_fmul_window(&fdsp, &cdsp, src0, src1, src2))`
			`ret -= 1 << 3;`
			`if (test_vector_fmul_add(&fdsp, &cdsp, src0, src1, src2))`
			`ret -= 1 << 4;`
			`if (test_vector_fmul_reverse(&fdsp, &cdsp, src0, src1))`
			`ret -= 1 << 5;`
			`if (test_butterflies_float(&fdsp, &cdsp, src0, src1))`
			`ret -= 1 << 6;`
			`if (test_scalarproduct_float(&fdsp, &cdsp, src0, src1))`
			`ret -= 1 << 7;`
			`if (test_vector_dmul_scalar(&fdsp, &cdsp, dbl_src0, dbl_src1[0]))`
			`ret -= 1 << 8;`

			`return ret;`
			`}`

			`#endif /* TEST */`