FFmpeg/libavcodec/arm/dsputil_init_neon.c

/*
 * ARM NEON optimised DSP functions
 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <stdint.h>

#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
#include "dsputil_arm.h"

void ff_simple_idct_neon(int16_t *data);
void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);

void ff_clear_block_neon(int16_t *block);
void ff_clear_blocks_neon(int16_t *blocks);

void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);

void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);

void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);

void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);

void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);

void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
                          int len);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
                               int32_t max, unsigned int len);

int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
                                             const int16_t *v3, int len, int mul);

void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
                                const int16_t *window, unsigned n);

void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
    const int high_bit_depth = avctx->bits_per_raw_sample > 8;

    if (avctx->bits_per_raw_sample <= 8) {
        if (avctx->idct_algo == FF_IDCT_AUTO ||
            avctx->idct_algo == FF_IDCT_SIMPLENEON) {
            c->idct_put              = ff_simple_idct_put_neon;
            c->idct_add              = ff_simple_idct_add_neon;
            c->idct                  = ff_simple_idct_neon;
            c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
        }
    }

    if (!high_bit_depth) {
        c->clear_block  = ff_clear_block_neon;
        c->clear_blocks = ff_clear_blocks_neon;

        c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
        c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
        c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
        c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
        c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
        c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
        c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
        c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;

        c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
        c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
        c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
        c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
        c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
        c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
        c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
        c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;

        c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
        c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;
        c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;
        c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;
        c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
        c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;
        c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;
        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;

        c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;
        c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;
        c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;
        c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;
    }

    c->add_pixels_clamped = ff_add_pixels_clamped_neon;
    c->put_pixels_clamped = ff_put_pixels_clamped_neon;
    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;

    if (CONFIG_H264_DECODER && !high_bit_depth) {
        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;

        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
    }

    c->vector_clipf               = ff_vector_clipf_neon;
    c->vector_clip_int32          = ff_vector_clip_int32_neon;

    c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
    c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon;

    c->apply_window_int16 = ff_apply_window_int16_neon;
}
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`/*`
			`* ARM NEON optimised DSP functions`
			`* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>`
			`*`
Replace FFmpeg with Libav in licence headers Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`* This file is part of Libav.`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`*`
Replace FFmpeg with Libav in licence headers Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`* Libav is free software; you can redistribute it and/or`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
Replace FFmpeg with Libav in licence headers Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`* Libav is distributed in the hope that it will be useful,`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
Replace FFmpeg with Libav in licence headers Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`* License along with Libav; if not, write to the Free Software`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`#include <stdint.h>`

			`#include "libavcodec/avcodec.h"`
			`#include "libavcodec/dsputil.h"`
ARM: clean up dsputil initialisation - Move v5 and v6 initialisation to separate files. - Move NEON IDCT selection to ff_dsputil_init_neon() Originally committed as revision 20163 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`#include "dsputil_arm.h"`

Drop DCTELEM typedef It does not help as an abstraction and adds dsputil dependencies. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 12 years ago			`void ff_simple_idct_neon(int16_t *data);`
			`void ff_simple_idct_put_neon(uint8_t dest, int line_size, int16_t data);`
			`void ff_simple_idct_add_neon(uint8_t dest, int line_size, int16_t data);`
ARM: clean up dsputil initialisation - Move v5 and v6 initialisation to separate files. - Move NEON IDCT selection to ff_dsputil_init_neon() Originally committed as revision 20163 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
Drop DCTELEM typedef It does not help as an abstraction and adds dsputil dependencies. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 12 years ago			`void ff_clear_block_neon(int16_t *block);`
			`void ff_clear_blocks_neon(int16_t *blocks);`
ARM: NEON clear_block[s] Originally committed as revision 23412 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
Use ptrdiff_t instead of int for {avg, put}_pixels line_size parameter. This avoids SIMD-optimized functions having to sign-extend their line size argument manually to be able to do pointer arithmetic. 12 years ago			`void ff_put_pixels16_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels16_x2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels16_y2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels16_xy2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels8_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels8_x2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels8_y2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels8_xy2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels16_x2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels16_y2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels16_xy2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels8_x2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels8_y2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_put_pixels8_xy2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
Use ptrdiff_t instead of int for {avg, put}_pixels line_size parameter. This avoids SIMD-optimized functions having to sign-extend their line size argument manually to be able to do pointer arithmetic. 12 years ago			`void ff_avg_pixels16_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels16_x2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels16_y2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels16_xy2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels8_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels8_x2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels8_y2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels8_xy2_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels16_x2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels16_y2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
			`void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t , const uint8_t , ptrdiff_t, int);`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
Drop DCTELEM typedef It does not help as an abstraction and adds dsputil dependencies. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 12 years ago			`void ff_add_pixels_clamped_neon(const int16_t , uint8_t , int);`
			`void ff_put_pixels_clamped_neon(const int16_t , uint8_t , int);`
			`void ff_put_signed_pixels_clamped_neon(const int16_t , uint8_t , int);`
ARM: NEON optimised add_pixels_clamped Based on patch by David Conrad. Originally committed as revision 18332 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
ARM: NEON optimised {put,avg}_h264_chroma_mc[48] Originally committed as revision 16147 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`void ff_put_h264_chroma_mc8_neon(uint8_t , uint8_t , int, int, int, int);`
			`void ff_put_h264_chroma_mc4_neon(uint8_t , uint8_t , int, int, int, int);`
ARM: NEON 2xN chroma MC Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`void ff_put_h264_chroma_mc2_neon(uint8_t , uint8_t , int, int, int, int);`
ARM: NEON optimised {put,avg}_h264_chroma_mc[48] Originally committed as revision 16147 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
			`void ff_avg_h264_chroma_mc8_neon(uint8_t , uint8_t , int, int, int, int);`
			`void ff_avg_h264_chroma_mc4_neon(uint8_t , uint8_t , int, int, int, int);`
ARM: NEON 2xN chroma MC Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`void ff_avg_h264_chroma_mc2_neon(uint8_t , uint8_t , int, int, int, int);`
ARM: NEON optimised {put,avg}_h264_chroma_mc[48] Originally committed as revision 16147 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
ARM: NEON optimised vector_clipf Originally committed as revision 20031 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`void ff_vector_clipf_neon(float dst, const float src, float min, float max,`
			`int len);`
ARM: NEON optimised vector_clip_int32() Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`void ff_vector_clip_int32_neon(int32_t dst, const int32_t src, int32_t min,`
			`int32_t max, unsigned int len);`
ARM: NEON optimised float_to_int16 Originally committed as revision 16352 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
dsputil: remove shift parameter from scalarproduct_int16 There is only one caller, which does not need the shifting. Other use cases are situations where different roundings would be needed. The x86 and neon versions are modified accordingly. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 13 years ago			`int32_t ff_scalarproduct_int16_neon(const int16_t v1, const int16_t v2, int len);`
Add const to some pointer parameters. Patch by Eli Friedman, eli D friedman A gmail Originally committed as revision 23826 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`int32_t ff_scalarproduct_and_madd_int16_neon(int16_t v1, const int16_t v2,`
			`const int16_t *v3, int len, int mul);`
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16 Patch by Kostya, minor fixes by me. Originally committed as revision 21958 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
ARM: NEON optimised apply_window_int16() Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`void ff_apply_window_int16_neon(int16_t dst, const int16_t src,`
ARM: fix ff_apply_window_int16_neon() prototype The length argument should be unsigned. No change in code. Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`const int16_t *window, unsigned n);`
ARM: NEON optimised apply_window_int16() Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`void ff_dsputil_init_neon(DSPContext c, AVCodecContext avctx)`
			`{`
dsputil: update per-arch init funcs for non-h264 high bit depth Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`const int high_bit_depth = avctx->bits_per_raw_sample > 8;`
Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder. This patch lets e.g. dsputil_init chose dsp functions with respect to the bit depth to decode. The naming scheme of bit depth dependent functions is <base name>_<bit depth>[_<prefix>] (i.e. the old clear_blocks_c is now named clear_blocks_8_c). Note: Some of the functions for high bit depth is not dependent on the bit depth, but only on the pixel size. This leaves some room for optimizing binary size. Preparatory patch for high bit depth h264 decoding support. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 14 years ago
Remove lowres video decoding This feature is complex, of questionable utility, and slows down normal decoding. Signed-off-by: Mans Rullgard <mans@mansr.com> 13 years ago			`if (avctx->bits_per_raw_sample <= 8) {`
ARM: clean up dsputil initialisation - Move v5 and v6 initialisation to separate files. - Move NEON IDCT selection to ff_dsputil_init_neon() Originally committed as revision 20163 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`if (avctx->idct_algo == FF_IDCT_AUTO \|\|`
			`avctx->idct_algo == FF_IDCT_SIMPLENEON) {`
ARM: cosmetics Originally committed as revision 20166 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->idct_put = ff_simple_idct_put_neon;`
			`c->idct_add = ff_simple_idct_add_neon;`
			`c->idct = ff_simple_idct_neon;`
ARM: clean up dsputil initialisation - Move v5 and v6 initialisation to separate files. - Move NEON IDCT selection to ff_dsputil_init_neon() Originally committed as revision 20163 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;`
			`}`
			`}`

Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder. This patch lets e.g. dsputil_init chose dsp functions with respect to the bit depth to decode. The naming scheme of bit depth dependent functions is <base name>_<bit depth>[_<prefix>] (i.e. the old clear_blocks_c is now named clear_blocks_8_c). Note: Some of the functions for high bit depth is not dependent on the bit depth, but only on the pixel size. This leaves some room for optimizing binary size. Preparatory patch for high bit depth h264 decoding support. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 14 years ago			`if (!high_bit_depth) {`
ARM: fix indentation in ff_dsputil_init_neon() Signed-off-by: Mans Rullgard <mans@mansr.com> 13 years ago			`c->clear_block = ff_clear_block_neon;`
			`c->clear_blocks = ff_clear_blocks_neon;`

			`c->put_pixels_tab[0][0] = ff_put_pixels16_neon;`
			`c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;`
			`c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;`
			`c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;`
			`c->put_pixels_tab[1][0] = ff_put_pixels8_neon;`
			`c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;`
			`c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;`
			`c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;`

			`c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;`
			`c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;`
			`c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;`
			`c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;`
			`c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;`
			`c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;`
			`c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;`
			`c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;`

			`c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;`
			`c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;`
			`c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;`
			`c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;`
			`c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;`
			`c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;`
			`c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;`
			`c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;`

dsputil: remove one array dimension from avg_no_rnd_pixels_tab. 12 years ago			`c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;`
			`c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;`
			`c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;`
			`c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;`
Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder. This patch lets e.g. dsputil_init chose dsp functions with respect to the bit depth to decode. The naming scheme of bit depth dependent functions is <base name>_<bit depth>[_<prefix>] (i.e. the old clear_blocks_c is now named clear_blocks_8_c). Note: Some of the functions for high bit depth is not dependent on the bit depth, but only on the pixel size. This leaves some room for optimizing binary size. Preparatory patch for high bit depth h264 decoding support. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 14 years ago			`}`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
ARM: NEON optimised add_pixels_clamped Based on patch by David Conrad. Originally committed as revision 18332 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`c->add_pixels_clamped = ff_add_pixels_clamped_neon;`
ARM: NEON put_pixels_clamped Originally committed as revision 18712 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`c->put_pixels_clamped = ff_put_pixels_clamped_neon;`
ARM: NEON optimized put_signed_pixels_clamped Originally committed as revision 18333 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;`
ARM: NEON optimised add_pixels_clamped Based on patch by David Conrad. Originally committed as revision 18332 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
ARM: fix indentation in ff_dsputil_init_neon() Signed-off-by: Mans Rullgard <mans@mansr.com> 13 years ago			`if (CONFIG_H264_DECODER && !high_bit_depth) {`
cosmetics: reindent/prettyprint after last commit Originally committed as revision 19495 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;`
			`c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;`
ARM: NEON 2xN chroma MC Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;`
cosmetics: reindent/prettyprint after last commit Originally committed as revision 19495 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
			`c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;`
			`c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;`
ARM: NEON 2xN chroma MC Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;`
Only compile in NEON optimizations for H.264 when the H.264 decoder is enabled. Originally committed as revision 19494 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`}`
ARM: NEON optimised float_to_int16 Originally committed as revision 16352 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
ARM: cosmetics Originally committed as revision 20166 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->vector_clipf = ff_vector_clipf_neon;`
ARM: NEON optimised vector_clip_int32() Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago			`c->vector_clip_int32 = ff_vector_clip_int32_neon;`
ARM: NEON optimisations for some dsputil functions NEON versions of the following functions are added: vector_fmul_scalar vector_fmul_sv_scalar sv_fmul_scalar butterflies_float Originally committed as revision 19957 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
ARM: NEON scalarproduct_int16 and scalarproduct_and_madd_int16 Patch by Kostya, minor fixes by me. Originally committed as revision 21958 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`c->scalarproduct_int16 = ff_scalarproduct_int16_neon;`
			`c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon;`
ARM: NEON optimised apply_window_int16() Signed-off-by: Mans Rullgard <mans@mansr.com> 14 years ago
			`c->apply_window_int16 = ff_apply_window_int16_neon;`
ARM: NEON optimised put_pixels functions Originally committed as revision 16145 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`}`