avcodec/x86/lossless_videodsp: Remove obsolete MMX(EXT) functions

The only systems which benefit from these are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
release/5.1
Andreas Rheinhardt 3 years ago
parent 230ea38de1
commit fed07efcde
  1. 20
      libavcodec/x86/lossless_videodsp.asm
  2. 58
      libavcodec/x86/lossless_videodsp_init.c

@ -38,11 +38,11 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
SECTION .text
;------------------------------------------------------------------------------
; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w,
; int *left, int *left_top)
; void ff_add_median_pred(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w,
; int *left, int *left_top)
;------------------------------------------------------------------------------
%macro MEDIAN_PRED 0
INIT_XMM sse2
cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top
movu m0, [topq]
mova m2, m0
@ -100,14 +100,6 @@ cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top
movzx r2d, byte [topq-1]
mov [left_topq], r2d
RET
%endmacro
%if ARCH_X86_32
INIT_MMX mmxext
MEDIAN_PRED
%endif
INIT_XMM sse2
MEDIAN_PRED
%macro ADD_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
@ -240,10 +232,6 @@ cglobal add_bytes, 3,4,2, dst, src, w, size
REP_RET
%endmacro
%if ARCH_X86_32
INIT_MMX mmx
ADD_BYTES
%endif
INIT_XMM sse2
ADD_BYTES

@ -19,17 +19,12 @@
*/
#include "config.h"
#include "libavutil/x86/asm.h"
#include "../lossless_videodsp.h"
#include "libavutil/x86/cpu.h"
void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w);
void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, ptrdiff_t w,
int *left, int *left_top);
void ff_add_median_pred_sse2(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, ptrdiff_t w,
int *left, int *left_top);
@ -47,63 +42,10 @@ int ff_add_left_pred_int16_unaligned_ssse3(uint16_t *dst, const uint16_t *src, u
void ff_add_gradient_pred_ssse3(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width);
void ff_add_gradient_pred_avx2(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width);
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, ptrdiff_t w,
int *left, int *left_top)
{
x86_reg w2 = -w;
x86_reg x;
int l = *left & 0xff;
int tl = *left_top & 0xff;
int t;
__asm__ volatile (
"mov %7, %3 \n"
"1: \n"
"movzbl (%3, %4), %2 \n"
"mov %2, %k3 \n"
"sub %b1, %b3 \n"
"add %b0, %b3 \n"
"mov %2, %1 \n"
"cmp %0, %2 \n"
"cmovg %0, %2 \n"
"cmovg %1, %0 \n"
"cmp %k3, %0 \n"
"cmovg %k3, %0 \n"
"mov %7, %3 \n"
"cmp %2, %0 \n"
"cmovl %2, %0 \n"
"add (%6, %4), %b0 \n"
"mov %b0, (%5, %4) \n"
"inc %4 \n"
"jl 1b \n"
: "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2)
: "r"(dst + w), "r"(diff + w), "rm"(top + w)
);
*left = l;
*left_top = tl;
}
#endif
void ff_llviddsp_init_x86(LLVidDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
if (cpu_flags & AV_CPU_FLAG_CMOV)
c->add_median_pred = add_median_pred_cmov;
#endif
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
c->add_bytes = ff_add_bytes_mmx;
}
if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) {
/* slower than cmov version on AMD */
if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
c->add_median_pred = ff_add_median_pred_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
c->add_bytes = ff_add_bytes_sse2;
c->add_median_pred = ff_add_median_pred_sse2;

Loading…
Cancel
Save