x86/intreadwrite: use intrinsics instead of inline asm for AV_COPY128

This has the benefit of removing any SSE -> AVX penalty that may happen when
the compiler emits VEX encoded instructions.

Signed-off-by: James Almer <jamrial@gmail.com>
release/7.1
James Almer 6 months ago
parent 4a04cca69a
commit bd1bcb07e0
  1. 5
      configure
  2. 20
      libavutil/x86/intreadwrite.h

5
configure vendored

@ -2314,6 +2314,7 @@ HEADERS_LIST="
INTRINSICS_LIST="
intrinsics_neon
intrinsics_sse
intrinsics_sse2
"
@ -2744,7 +2745,8 @@ armv6t2_deps="arm"
armv8_deps="aarch64"
neon_deps_any="aarch64 arm"
intrinsics_neon_deps="neon"
intrinsics_sse2_deps="sse2"
intrinsics_sse_deps="sse"
intrinsics_sse2_deps="sse2 intrinsics_sse"
vfp_deps="arm"
vfpv3_deps="vfp"
setend_deps="arm"
@ -6446,6 +6448,7 @@ elif enabled loongarch; then
fi
check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)"
check_cc intrinsics_sse immintrin.h "__m128 test = _mm_setzero_ps()"
check_cc intrinsics_sse2 emmintrin.h "__m128i test = _mm_setzero_si128()"
check_ldflags -Wl,--as-needed

@ -22,29 +22,25 @@
#define AVUTIL_X86_INTREADWRITE_H
#include <stdint.h>
#if HAVE_INTRINSICS_SSE
#include <immintrin.h>
#endif
#if HAVE_INTRINSICS_SSE2
#include <emmintrin.h>
#endif
#include "config.h"
#include "libavutil/attributes.h"
#if HAVE_MMX
#ifdef __SSE__
#if HAVE_INTRINSICS_SSE
#define AV_COPY128 AV_COPY128
static av_always_inline void AV_COPY128(void *d, const void *s)
{
struct v {uint64_t v[2];};
__asm__("movaps %1, %%xmm0 \n\t"
"movaps %%xmm0, %0 \n\t"
: "=m"(*(struct v*)d)
: "m" (*(const struct v*)s)
: "xmm0");
__m128 tmp = _mm_load_ps(s);
_mm_store_ps(d, tmp);
}
#endif /* __SSE__ */
#endif /* HAVE_INTRINSICS_SSE */
#if HAVE_INTRINSICS_SSE2
@ -57,6 +53,4 @@ static av_always_inline void AV_ZERO128(void *d)
#endif /* HAVE_INTRINSICS_SSE2 */
#endif /* HAVE_MMX */
#endif /* AVUTIL_X86_INTREADWRITE_H */

Loading…
Cancel
Save