FFmpeg/libavutil/float_dsp.c

/*
 * Copyright 2005 Balatoni Denes
 * Copyright 2006 Loren Merritt
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "config.h"
#include "attributes.h"
#include "float_dsp.h"
#include "mem.h"

static void vector_fmul_c(float *dst, const float *src0, const float *src1,
                          int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] = src0[i] * src1[i];
}

static void vector_dmul_c(double *dst, const double *src0, const double *src1,
                          int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] = src0[i] * src1[i];
}

static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
                                 int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] += src[i] * mul;
}

static void vector_dmac_scalar_c(double *dst, const double *src, double mul,
                                 int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] += src[i] * mul;
}

static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
                                 int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] = src[i] * mul;
}

static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
                                 int len)
{
    int i;
    for (i = 0; i < len; i++)
        dst[i] = src[i] * mul;
}

static void vector_fmul_window_c(float *dst, const float *src0,
                                 const float *src1, const float *win, int len)
{
    int i, j;

    dst  += len;
    win  += len;
    src0 += len;

    for (i = -len, j = len - 1; i < 0; i++, j--) {
        float s0 = src0[i];
        float s1 = src1[j];
        float wi = win[i];
        float wj = win[j];
        dst[i] = s0 * wj - s1 * wi;
        dst[j] = s0 * wi + s1 * wj;
    }
}

static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
                              const float *src2, int len){
    int i;

    for (i = 0; i < len; i++)
        dst[i] = src0[i] * src1[i] + src2[i];
}

static void vector_fmul_reverse_c(float *dst, const float *src0,
                                  const float *src1, int len)
{
    int i;

    src1 += len-1;
    for (i = 0; i < len; i++)
        dst[i] = src0[i] * src1[-i];
}

static void butterflies_float_c(float *restrict v1, float *restrict v2,
                                int len)
{
    int i;

    for (i = 0; i < len; i++) {
        float t = v1[i] - v2[i];
        v1[i] += v2[i];
        v2[i] = t;
    }
}

float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
{
    float p = 0.0;
    int i;

    for (i = 0; i < len; i++)
        p += v1[i] * v2[i];

    return p;
}

double ff_scalarproduct_double_c(const double *v1, const double *v2,
                                 size_t len)
{
    double p = 0.0;

    for (size_t i = 0; i < len; i++)
        p += v1[i] * v2[i];

    return p;
}

av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
{
    AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));
    if (!fdsp)
        return NULL;

    fdsp->vector_fmul = vector_fmul_c;
    fdsp->vector_dmul = vector_dmul_c;
    fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
    fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
    fdsp->vector_dmac_scalar = vector_dmac_scalar_c;
    fdsp->vector_dmul_scalar = vector_dmul_scalar_c;
    fdsp->vector_fmul_window = vector_fmul_window_c;
    fdsp->vector_fmul_add = vector_fmul_add_c;
    fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
    fdsp->butterflies_float = butterflies_float_c;
    fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
    fdsp->scalarproduct_double = ff_scalarproduct_double_c;

#if ARCH_AARCH64
    ff_float_dsp_init_aarch64(fdsp);
#elif ARCH_ARM
    ff_float_dsp_init_arm(fdsp);
#elif ARCH_PPC
    ff_float_dsp_init_ppc(fdsp, bit_exact);
#elif ARCH_RISCV
    ff_float_dsp_init_riscv(fdsp);
#elif ARCH_X86
    ff_float_dsp_init_x86(fdsp);
#elif ARCH_MIPS
    ff_float_dsp_init_mips(fdsp);
#endif
    return fdsp;
}
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`/*`
float_dsp.c: Restore author attribution that was removed by libav while moving code to libavutil Original code comes from: commit eb4825b5d43bb6ecfae4d64688f9e2d2ac075263 Author: Loren Merritt <lorenm@u.washington.edu> Date: Thu Aug 10 19:06:25 2006 +0000 sse and 3dnow implementations of float->int conversion and mdct windowing. 15% faster vorbis. and commit 0bde73d907dbe684e01bafc979bf4a04ddeace1d Author: Michael Niedermayer <michaelni@gmx.at> Date: Tue May 17 19:02:43 2005 +0000 Vorbis decoder by (Balatoni Denes \| dbalatoni programozo hu) Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 13 years ago			`* Copyright 2005 Balatoni Denes`
			`* Copyright 2006 Loren Merritt`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`*`
float_dsp.c: Restore author attribution that was removed by libav while moving code to libavutil Original code comes from: commit eb4825b5d43bb6ecfae4d64688f9e2d2ac075263 Author: Loren Merritt <lorenm@u.washington.edu> Date: Thu Aug 10 19:06:25 2006 +0000 sse and 3dnow implementations of float->int conversion and mdct windowing. 15% faster vorbis. and commit 0bde73d907dbe684e01bafc979bf4a04ddeace1d Author: Michael Niedermayer <michaelni@gmx.at> Date: Tue May 17 19:02:43 2005 +0000 Vorbis decoder by (Balatoni Denes \| dbalatoni programozo hu) Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 13 years ago			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
float_dsp.c: Restore author attribution that was removed by libav while moving code to libavutil Original code comes from: commit eb4825b5d43bb6ecfae4d64688f9e2d2ac075263 Author: Loren Merritt <lorenm@u.washington.edu> Date: Thu Aug 10 19:06:25 2006 +0000 sse and 3dnow implementations of float->int conversion and mdct windowing. 15% faster vorbis. and commit 0bde73d907dbe684e01bafc979bf4a04ddeace1d Author: Michael Niedermayer <michaelni@gmx.at> Date: Tue May 17 19:02:43 2005 +0000 Vorbis decoder by (Balatoni Denes \| dbalatoni programozo hu) Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 13 years ago			`* FFmpeg is distributed in the hope that it will be useful,`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
float_dsp.c: Restore author attribution that was removed by libav while moving code to libavutil Original code comes from: commit eb4825b5d43bb6ecfae4d64688f9e2d2ac075263 Author: Loren Merritt <lorenm@u.washington.edu> Date: Thu Aug 10 19:06:25 2006 +0000 sse and 3dnow implementations of float->int conversion and mdct windowing. 15% faster vorbis. and commit 0bde73d907dbe684e01bafc979bf4a04ddeace1d Author: Michael Niedermayer <michaelni@gmx.at> Date: Tue May 17 19:02:43 2005 +0000 Vorbis decoder by (Balatoni Denes \| dbalatoni programozo hu) Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 13 years ago			`* License along with FFmpeg; if not, write to the Free Software`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`#include "config.h"`
Drop pointless directory name prefixes from #includes in the current dir 11 years ago			`#include "attributes.h"`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`#include "float_dsp.h"`
avutil/float_dsp: add avpriv_float_dsp_alloc() Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 10 years ago			`#include "mem.h"`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago
			`static void vector_fmul_c(float dst, const float src0, const float *src1,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src0[i] * src1[i];`
			`}`

avutil: add float_dsp.vector_dmul 6 years ago			`static void vector_dmul_c(double dst, const double src0, const double *src1,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src0[i] * src1[i];`
			`}`

float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil 13 years ago			`static void vector_fmac_scalar_c(float dst, const float src, float mul,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] += src[i] * mul;`
			`}`

avutil/float_dsp: add vector_dmac_scalar() Signed-off-by: Paul B Mahol <onemda@gmail.com> 8 years ago			`static void vector_dmac_scalar_c(double dst, const double src, double mul,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] += src[i] * mul;`
			`}`

dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil 12 years ago			`static void vector_fmul_scalar_c(float dst, const float src, float mul,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src[i] * mul;`
			`}`

float_dsp: add vector_dmul_scalar() to multiply a vector of doubles Include x86-optimized versions for SSE2 and AVX. 12 years ago			`static void vector_dmul_scalar_c(double dst, const double src, double mul,`
			`int len)`
			`{`
			`int i;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src[i] * mul;`
			`}`

lavc: Move vector_fmul_window to AVFloatDSPContext Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`static void vector_fmul_window_c(float dst, const float src0,`
			`const float src1, const float win, int len)`
			`{`
			`int i, j;`

			`dst += len;`
			`win += len;`
			`src0 += len;`

			`for (i = -len, j = len - 1; i < 0; i++, j--) {`
			`float s0 = src0[i];`
			`float s1 = src1[j];`
			`float wi = win[i];`
			`float wj = win[j];`
			`dst[i] = s0 * wj - s1 * wi;`
			`dst[j] = s0 * wi + s1 * wj;`
			`}`
			`}`

floatdsp: move vector_fmul_add from dsputil to avfloatdsp. 12 years ago			`static void vector_fmul_add_c(float dst, const float src0, const float *src1,`
			`const float *src2, int len){`
			`int i;`

			`for (i = 0; i < len; i++)`
			`dst[i] = src0[i] * src1[i] + src2[i];`
			`}`

floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp. Now, nellymoserenc and aacenc no longer depends on dsputil. Independent of this patch, wmaprodec also does not depend on dsputil, so I removed it from there also. 12 years ago			`static void vector_fmul_reverse_c(float dst, const float src0,`
			`const float *src1, int len)`
			`{`
			`int i;`

			`src1 += len-1;`
			`for (i = 0; i < len; i++)`
			`dst[i] = src0[i] * src1[-i];`
			`}`

configure: Remove av_restrict All versions of MSVC that support C11 (namely >= v19.27) also support the restrict keyword, therefore av_restrict is no longer necessary since 75697836b1db3e0f0a3b7061be6be28d00c675a0. Reviewed-by: Martin Storsjö <martin@martin.st> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 8 months ago			`static void butterflies_float_c(float restrict v1, float restrict v2,`
floatdsp: move butterflies_float from dsputil to avfloatdsp. This makes wmadec/enc, twinvq and mpegaudiodec (i.e. mp2/mp3) independent of dsputil. 12 years ago			`int len)`
			`{`
			`int i;`

			`for (i = 0; i < len; i++) {`
			`float t = v1[i] - v2[i];`
			`v1[i] += v2[i];`
			`v2[i] = t;`
			`}`
			`}`

floatdsp: move scalarproduct_float from dsputil to avfloatdsp. This makes the aac decoder and all voice codecs independent of dsputil. 12 years ago			`float avpriv_scalarproduct_float_c(const float v1, const float v2, int len)`
			`{`
			`float p = 0.0;`
			`int i;`

			`for (i = 0; i < len; i++)`
			`p += v1[i] * v2[i];`

			`return p;`
			`}`

lavu/float_dsp: add double-precision scalar product The function pointer is appended to the structure for backward binary compatibility. Fortunately, this is allocated by libavutil, not by the user, so increasing the structure size is safe. 6 months ago			`double ff_scalarproduct_double_c(const double v1, const double v2,`
			`size_t len)`
			`{`
			`double p = 0.0;`

			`for (size_t i = 0; i < len; i++)`
			`p += v1[i] * v2[i];`

			`return p;`
			`}`

avutil: merge avpriv_float_dsp_init into avpriv_float_dsp_alloc Also replace the last two usages of avpriv_float_dsp_init with avpriv_float_dsp_alloc. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Cadhalpun <Andreas.Cadhalpun@googlemail.com> 9 years ago			`av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`{`
avutil: merge avpriv_float_dsp_init into avpriv_float_dsp_alloc Also replace the last two usages of avpriv_float_dsp_init with avpriv_float_dsp_alloc. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Cadhalpun <Andreas.Cadhalpun@googlemail.com> 9 years ago			`AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));`
			`if (!fdsp)`
			`return NULL;`

Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago			`fdsp->vector_fmul = vector_fmul_c;`
avutil: add float_dsp.vector_dmul 6 years ago			`fdsp->vector_dmul = vector_dmul_c;`
float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil 13 years ago			`fdsp->vector_fmac_scalar = vector_fmac_scalar_c;`
dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil 12 years ago			`fdsp->vector_fmul_scalar = vector_fmul_scalar_c;`
avutil/float_dsp: add vector_dmac_scalar() Signed-off-by: Paul B Mahol <onemda@gmail.com> 8 years ago			`fdsp->vector_dmac_scalar = vector_dmac_scalar_c;`
float_dsp: add vector_dmul_scalar() to multiply a vector of doubles Include x86-optimized versions for SSE2 and AVX. 12 years ago			`fdsp->vector_dmul_scalar = vector_dmul_scalar_c;`
lavc: Move vector_fmul_window to AVFloatDSPContext Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 12 years ago			`fdsp->vector_fmul_window = vector_fmul_window_c;`
floatdsp: move vector_fmul_add from dsputil to avfloatdsp. 12 years ago			`fdsp->vector_fmul_add = vector_fmul_add_c;`
floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp. Now, nellymoserenc and aacenc no longer depends on dsputil. Independent of this patch, wmaprodec also does not depend on dsputil, so I removed it from there also. 12 years ago			`fdsp->vector_fmul_reverse = vector_fmul_reverse_c;`
floatdsp: move butterflies_float from dsputil to avfloatdsp. This makes wmadec/enc, twinvq and mpegaudiodec (i.e. mp2/mp3) independent of dsputil. 12 years ago			`fdsp->butterflies_float = butterflies_float_c;`
floatdsp: move scalarproduct_float from dsputil to avfloatdsp. This makes the aac decoder and all voice codecs independent of dsputil. 12 years ago			`fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;`
lavu/float_dsp: add double-precision scalar product The function pointer is appended to the structure for backward binary compatibility. Fortunately, this is allocated by libavutil, not by the user, so increasing the structure size is safe. 6 months ago			`fdsp->scalarproduct_double = ff_scalarproduct_double_c;`
Add a float DSP framework to libavutil Move vector_fmul() from DSPContext to AVFloatDSPContext. 13 years ago
all: Replace if (ARCH_FOO) checks by #if ARCH_FOO This is more spec-compliant because it does not rely on dead-code elimination by the compiler. Especially MSVC has problems with this, as can be seen in https://ffmpeg.org/pipermail/ffmpeg-devel/2022-May/296373.html or https://ffmpeg.org/pipermail/ffmpeg-devel/2022-May/297022.html This commit does not eliminate every instance where we rely on dead code elimination: It only tackles branching to the initialization of arch-specific dsp code, not e.g. all uses of CONFIG_ and HAVE_ checks. But maybe it is already enough to compile FFmpeg with MSVC with whole-programm-optimizations enabled (if one does not disable too many components). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2 years ago			`#if ARCH_AARCH64`
			`ff_float_dsp_init_aarch64(fdsp);`
			`#elif ARCH_ARM`
			`ff_float_dsp_init_arm(fdsp);`
			`#elif ARCH_PPC`
			`ff_float_dsp_init_ppc(fdsp, bit_exact);`
lavu/floatdsp: RISC-V V vector_fmul_scalar This is based on existing code from the VLC git tree with two minor changes to account for the different function prototypes. 2 years ago			`#elif ARCH_RISCV`
			`ff_float_dsp_init_riscv(fdsp);`
all: Replace if (ARCH_FOO) checks by #if ARCH_FOO This is more spec-compliant because it does not rely on dead-code elimination by the compiler. Especially MSVC has problems with this, as can be seen in https://ffmpeg.org/pipermail/ffmpeg-devel/2022-May/296373.html or https://ffmpeg.org/pipermail/ffmpeg-devel/2022-May/297022.html This commit does not eliminate every instance where we rely on dead code elimination: It only tackles branching to the initialization of arch-specific dsp code, not e.g. all uses of CONFIG_ and HAVE_ checks. But maybe it is already enough to compile FFmpeg with MSVC with whole-programm-optimizations enabled (if one does not disable too many components). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2 years ago			`#elif ARCH_X86`
			`ff_float_dsp_init_x86(fdsp);`
			`#elif ARCH_MIPS`
			`ff_float_dsp_init_mips(fdsp);`
			`#endif`
avutil: merge avpriv_float_dsp_init into avpriv_float_dsp_alloc Also replace the last two usages of avpriv_float_dsp_init with avpriv_float_dsp_alloc. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Cadhalpun <Andreas.Cadhalpun@googlemail.com> 9 years ago			`return fdsp;`
avutil/float_dsp: add avpriv_float_dsp_alloc() Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 10 years ago			`}`