FFmpeg/libavcodec/arm/dsputil_init_neon.c

/*
 * ARM NEON optimised DSP functions
 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <stdint.h>

#include "libavutil/attributes.h"
#include "libavcodec/avcodec.h"
#include "dsputil_arm.h"

void ff_simple_idct_neon(int16_t *data);
void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);

void ff_clear_block_neon(int16_t *block);
void ff_clear_blocks_neon(int16_t *blocks);

void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);

void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
                          int len);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
                               int32_t max, unsigned int len);

int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
                                             const int16_t *v3, int len, int mul);

av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
    const int high_bit_depth = avctx->bits_per_raw_sample > 8;

    if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) {
        if (avctx->idct_algo == FF_IDCT_AUTO ||
            avctx->idct_algo == FF_IDCT_SIMPLENEON) {
            c->idct_put              = ff_simple_idct_put_neon;
            c->idct_add              = ff_simple_idct_add_neon;
            c->idct                  = ff_simple_idct_neon;
            c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
        }
    }

    if (!high_bit_depth) {
        c->clear_block  = ff_clear_block_neon;
        c->clear_blocks = ff_clear_blocks_neon;
    }

    c->add_pixels_clamped = ff_add_pixels_clamped_neon;
    c->put_pixels_clamped = ff_put_pixels_clamped_neon;
    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;

    c->vector_clipf               = ff_vector_clipf_neon;
    c->vector_clip_int32          = ff_vector_clip_int32_neon;

    c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
    c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon;
}
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`/*`
			`* ARM NEON optimised DSP functions`
			`* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>`
			`*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with FFmpeg; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`#include <stdint.h>`

Add av_cold attributes to arch-specific init functions 12 years ago			`#include "libavutil/attributes.h"`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#include "libavcodec/avcodec.h"`
ARM: clean up dsputil initialisation - Move v5 and v6 initialisation to separate files. - Move NEON IDCT selection to ff_dsputil_init_neon() Originally committed as revision 20163 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`#include "dsputil_arm.h"`

Drop DCTELEM typedef It does not help as an abstraction and adds dsputil dependencies. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 12 years ago			`void ff_simple_idct_neon(int16_t *data);`
			`void ff_simple_idct_put_neon(uint8_t dest, int line_size, int16_t data);`
			`void ff_simple_idct_add_neon(uint8_t dest, int line_size, int16_t data);`
ARM: clean up dsputil initialisation - Move v5 and v6 initialisation to separate files. - Move NEON IDCT selection to ff_dsputil_init_neon() Originally committed as revision 20163 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
Drop DCTELEM typedef It does not help as an abstraction and adds dsputil dependencies. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 12 years ago			`void ff_clear_block_neon(int16_t *block);`
			`void ff_clear_blocks_neon(int16_t *blocks);`
ARM: NEON clear_block[s] Originally committed as revision 23412 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
Drop DCTELEM typedef It does not help as an abstraction and adds dsputil dependencies. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 12 years ago			`void ff_add_pixels_clamped_neon(const int16_t , uint8_t , int);`
			`void ff_put_pixels_clamped_neon(const int16_t , uint8_t , int);`
			`void ff_put_signed_pixels_clamped_neon(const int16_t , uint8_t , int);`
ARM: NEON optimised add_pixels_clamped Based on patch by David Conrad. Originally committed as revision 18332 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
ARM: NEON optimised vector_clipf Originally committed as revision 20031 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`void ff_vector_clipf_neon(float dst, const float src, float min, float max,`
			`int len);`
ARM: NEON optimised vector_clip_int32() Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`void ff_vector_clip_int32_neon(int32_t dst, const int32_t src, int32_t min,`
			`int32_t max, unsigned int len);`
ARM: NEON optimised float_to_int16 Originally committed as revision 16352 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
dsputil: remove shift parameter from scalarproduct_int16 There is only one caller, which does not need the shifting. Other use cases are situations where different roundings would be needed. The x86 and neon versions are modified accordingly. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 13 years ago			`int32_t ff_scalarproduct_int16_neon(const int16_t v1, const int16_t v2, int len);`
Add const to some pointer parameters. Patch by Eli Friedman, eli D friedman A gmail Originally committed as revision 23826 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`int32_t ff_scalarproduct_and_madd_int16_neon(int16_t v1, const int16_t v2,`
			`const int16_t *v3, int len, int mul);`
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16 Patch by Kostya, minor fixes by me. Originally committed as revision 21958 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
Add av_cold attributes to arch-specific init functions 12 years ago			`av_cold void ff_dsputil_init_neon(DSPContext c, AVCodecContext avctx)`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`{`
dsputil: update per-arch init funcs for non-h264 high bit depth Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`const int high_bit_depth = avctx->bits_per_raw_sample > 8;`
Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder. This patch lets e.g. dsputil_init chose dsp functions with respect to the bit depth to decode. The naming scheme of bit depth dependent functions is <base name>_<bit depth>[_<prefix>] (i.e. the old clear_blocks_c is now named clear_blocks_8_c). Note: Some of the functions for high bit depth is not dependent on the bit depth, but only on the pixel size. This leaves some room for optimizing binary size. Preparatory patch for high bit depth h264 decoding support. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 14 years ago
lowres2 support. The new lowres support is limited to decoders where lowres decoding is possible in high quality. I was not able to measure any speed difference, but if one is found the 2-3 lines that might affect speed can be made compile time conditional Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 13 years ago			`if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) {`
ARM: clean up dsputil initialisation - Move v5 and v6 initialisation to separate files. - Move NEON IDCT selection to ff_dsputil_init_neon() Originally committed as revision 20163 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`if (avctx->idct_algo == FF_IDCT_AUTO \|\|`
			`avctx->idct_algo == FF_IDCT_SIMPLENEON) {`
ARM: cosmetics Originally committed as revision 20166 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->idct_put = ff_simple_idct_put_neon;`
			`c->idct_add = ff_simple_idct_add_neon;`
			`c->idct = ff_simple_idct_neon;`
ARM: clean up dsputil initialisation - Move v5 and v6 initialisation to separate files. - Move NEON IDCT selection to ff_dsputil_init_neon() Originally committed as revision 20163 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;`
			`}`
			`}`

Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder. This patch lets e.g. dsputil_init chose dsp functions with respect to the bit depth to decode. The naming scheme of bit depth dependent functions is <base name>_<bit depth>[_<prefix>] (i.e. the old clear_blocks_c is now named clear_blocks_8_c). Note: Some of the functions for high bit depth is not dependent on the bit depth, but only on the pixel size. This leaves some room for optimizing binary size. Preparatory patch for high bit depth h264 decoding support. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 14 years ago			`if (!high_bit_depth) {`
ARM: fix indentation in ff_dsputil_init_neon() Signed-off-by: Mans Rullgard <mans@mansr.com> 13 years ago			`c->clear_block = ff_clear_block_neon;`
			`c->clear_blocks = ff_clear_blocks_neon;`
Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder. This patch lets e.g. dsputil_init chose dsp functions with respect to the bit depth to decode. The naming scheme of bit depth dependent functions is <base name>_<bit depth>[_<prefix>] (i.e. the old clear_blocks_c is now named clear_blocks_8_c). Note: Some of the functions for high bit depth is not dependent on the bit depth, but only on the pixel size. This leaves some room for optimizing binary size. Preparatory patch for high bit depth h264 decoding support. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 14 years ago			`}`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
ARM: NEON optimised add_pixels_clamped Based on patch by David Conrad. Originally committed as revision 18332 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`c->add_pixels_clamped = ff_add_pixels_clamped_neon;`
ARM: NEON put_pixels_clamped Originally committed as revision 18712 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`c->put_pixels_clamped = ff_put_pixels_clamped_neon;`
ARM: NEON optimized put_signed_pixels_clamped Originally committed as revision 18333 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;`
ARM: NEON optimised add_pixels_clamped Based on patch by David Conrad. Originally committed as revision 18332 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
ARM: cosmetics Originally committed as revision 20166 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->vector_clipf = ff_vector_clipf_neon;`
ARM: NEON optimised vector_clip_int32() Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`c->vector_clip_int32 = ff_vector_clip_int32_neon;`
ARM: NEON optimisations for some dsputil functions NEON versions of the following functions are added: vector_fmul_scalar vector_fmul_sv_scalar sv_fmul_scalar butterflies_float Originally committed as revision 19957 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16 Patch by Kostya, minor fixes by me. Originally committed as revision 21958 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->scalarproduct_int16 = ff_scalarproduct_int16_neon;`
			`c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon;`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`}`