diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 2f0354a2c8..28649522ff 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -67,6 +67,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o +OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \ x86/vp9dsp_init_10bpp.o \ @@ -169,6 +170,7 @@ YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o +YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \ x86/vp9intrapred_16bpp.o \ diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index 82fb8934af..ce5d7a4e28 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -175,53 +175,6 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_X2 -; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 -cglobal put_no_rnd_pixels8_x2_exact, 4,5 - lea r4, [r2*3] - pcmpeqb m6, m6 -.loop: - mova m0, [r1] - mova m2, [r1+r2] - mova m1, [r1+1] - mova m3, [r1+r2+1] - pxor m0, m6 - pxor m2, m6 - pxor m1, m6 - pxor m3, m6 - PAVGB m0, m1 - PAVGB m2, m3 - pxor m0, m6 - pxor m2, m6 - mova [r0], m0 - mova [r0+r2], m2 - mova m0, [r1+r2*2] - mova m1, [r1+r2*2+1] - mova m2, [r1+r4] - mova m3, [r1+r4+1] - pxor m0, m6 - pxor m1, m6 - pxor m2, m6 - pxor m3, m6 - PAVGB m0, m1 - PAVGB m2, m3 - pxor m0, m6 - pxor m2, m6 - mova [r0+r2*2], m0 - mova [r0+r4], m2 - lea r1, [r1+r2*4] - lea r0, [r0+r2*4] - sub r3d, 4 - jg .loop - REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_X2_EXACT -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_X2_EXACT - - ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_PIXELS8_Y2 0 %if cpuflag(sse2) @@ -300,48 +253,6 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_Y2 -; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 -cglobal put_no_rnd_pixels8_y2_exact, 4,5 - lea r4, [r2*3] - mova m0, [r1] - pcmpeqb m6, m6 - add r1, r2 - pxor m0, m6 -.loop: - mova m1, [r1] - mova m2, [r1+r2] - pxor m1, m6 - pxor m2, m6 - PAVGB m0, m1 - PAVGB m1, m2 - pxor m0, m6 - pxor m1, m6 - mova [r0], m0 - mova [r0+r2], m1 - mova m1, [r1+r2*2] - mova m0, [r1+r4] - pxor m1, m6 - pxor m0, m6 - PAVGB m2, m1 - PAVGB m1, m0 - pxor m2, m6 - pxor m1, m6 - mova [r0+r2*2], m2 - mova [r0+r4], m1 - lea r1, [r1+r2*4] - lea r0, [r0+r2*4] - sub r3d, 4 - jg .loop - REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_Y2_EXACT -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_Y2_EXACT - - ; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro AVG_PIXELS8 0 cglobal avg_pixels8, 4,5 diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h index 5fae990a4f..bf97029b57 100644 --- a/libavcodec/x86/hpeldsp.h +++ b/libavcodec/x86/hpeldsp.h @@ -22,6 +22,8 @@ #include #include +#include "libavcodec/hpeldsp.h" + void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); @@ -50,4 +52,6 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags); + #endif /* AVCODEC_X86_HPELDSP_H */ diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index f1ba4be6a2..e8da184ae6 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -51,12 +51,6 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, @@ -65,12 +59,6 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, @@ -242,11 +230,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags) c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext; c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext; } - - if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) { - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; - } #endif /* HAVE_MMXEXT_EXTERNAL */ } @@ -278,11 +261,6 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags) c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow; c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow; } - - if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) { - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; - } #endif /* HAVE_AMD3DNOW_EXTERNAL */ } @@ -332,4 +310,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) if (EXTERNAL_SSSE3(cpu_flags)) hpeldsp_init_ssse3(c, flags, cpu_flags); + + if (CONFIG_VP3_DECODER) + ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags); } diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm new file mode 100644 index 0000000000..cba96d06cb --- /dev/null +++ b/libavcodec/x86/hpeldsp_vp3.asm @@ -0,0 +1,111 @@ +;****************************************************************************** +;* SIMD-optimized halfpel functions for VP3 +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 +cglobal put_no_rnd_pixels8_x2_exact, 4,5 + lea r4, [r2*3] + pcmpeqb m6, m6 +.loop: + mova m0, [r1] + mova m2, [r1+r2] + mova m1, [r1+1] + mova m3, [r1+r2+1] + pxor m0, m6 + pxor m2, m6 + pxor m1, m6 + pxor m3, m6 + PAVGB m0, m1 + PAVGB m2, m3 + pxor m0, m6 + pxor m2, m6 + mova [r0], m0 + mova [r0+r2], m2 + mova m0, [r1+r2*2] + mova m1, [r1+r2*2+1] + mova m2, [r1+r4] + mova m3, [r1+r4+1] + pxor m0, m6 + pxor m1, m6 + pxor m2, m6 + pxor m3, m6 + PAVGB m0, m1 + PAVGB m2, m3 + pxor m0, m6 + pxor m2, m6 + mova [r0+r2*2], m0 + mova [r0+r4], m2 + lea r1, [r1+r2*4] + lea r0, [r0+r2*4] + sub r3d, 4 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PUT_NO_RND_PIXELS8_X2_EXACT +INIT_MMX 3dnow +PUT_NO_RND_PIXELS8_X2_EXACT + + +; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 +cglobal put_no_rnd_pixels8_y2_exact, 4,5 + lea r4, [r2*3] + mova m0, [r1] + pcmpeqb m6, m6 + add r1, r2 + pxor m0, m6 +.loop: + mova m1, [r1] + mova m2, [r1+r2] + pxor m1, m6 + pxor m2, m6 + PAVGB m0, m1 + PAVGB m1, m2 + pxor m0, m6 + pxor m1, m6 + mova [r0], m0 + mova [r0+r2], m1 + mova m1, [r1+r2*2] + mova m0, [r1+r4] + pxor m1, m6 + pxor m0, m6 + PAVGB m2, m1 + PAVGB m1, m0 + pxor m2, m6 + pxor m1, m6 + mova [r0+r2*2], m2 + mova [r0+r4], m1 + lea r1, [r1+r2*4] + lea r0, [r0+r2*4] + sub r3d, 4 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PUT_NO_RND_PIXELS8_Y2_EXACT +INIT_MMX 3dnow +PUT_NO_RND_PIXELS8_Y2_EXACT diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c new file mode 100644 index 0000000000..5979f4123c --- /dev/null +++ b/libavcodec/x86/hpeldsp_vp3_init.c @@ -0,0 +1,56 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" + +#include "libavcodec/avcodec.h" +#include "libavcodec/hpeldsp.h" + +#include "hpeldsp.h" + +void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); + +av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags) +{ + if (EXTERNAL_AMD3DNOW(cpu_flags)) { + if (flags & AV_CODEC_FLAG_BITEXACT) { + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; + } + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + if (flags & AV_CODEC_FLAG_BITEXACT) { + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; + } + } +}