swscale/x86/rgb2rgb: remove mmxext version of shuffle_bytes_2103

Signed-off-by: James Almer <jamrial@gmail.com>
release/7.1
James Almer 7 months ago
parent c578bb9864
commit e8cef5e152
  1. 4
      libswscale/x86/rgb2rgb.c
  2. 64
      libswscale/x86/rgb_2_rgb.asm

@ -2343,7 +2343,6 @@ static void interleave_bytes_sse2(const uint8_t *src1, const uint8_t *src2, uint
#endif /* HAVE_INLINE_ASM */
void ff_shuffle_bytes_2103_mmxext(const uint8_t *src, uint8_t *dst, int src_size);
void ff_shuffle_bytes_2103_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
void ff_shuffle_bytes_0321_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
void ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
@ -2406,9 +2405,6 @@ av_cold void rgb2rgb_init_x86(void)
interleaveBytes = interleave_bytes_sse2;
#endif /* HAVE_INLINE_ASM */
if (EXTERNAL_MMXEXT(cpu_flags)) {
shuffle_bytes_2103 = ff_shuffle_bytes_2103_mmxext;
}
#if HAVE_SSE2_EXTERNAL
if (EXTERNAL_SSE2(cpu_flags)) {
#if ARCH_X86_64

@ -25,7 +25,6 @@
SECTION_RODATA
pb_mask_shuffle2103_mmx times 8 dw 255
pb_shuffle2103: db 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15
pb_shuffle0321: db 0, 3, 2, 1, 4, 7, 6, 5, 8, 11, 10, 9, 12, 15, 14, 13
pb_shuffle1230: db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
@ -47,69 +46,6 @@ SECTION .text
%endif
%endmacro
;------------------------------------------------------------------------------
; shuffle_bytes_2103_mmext (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------
INIT_MMX mmxext
cglobal shuffle_bytes_2103, 3, 5, 8, src, dst, w, tmp, x
mova m6, [pb_mask_shuffle2103_mmx]
mova m7, m6
psllq m7, 8
movsxdifnidn wq, wd
mov xq, wq
add srcq, wq
add dstq, wq
neg wq
;calc scalar loop
and xq, mmsize*2 -4
je .loop_simd
.loop_scalar:
mov tmpb, [srcq + wq + 2]
mov [dstq+wq + 0], tmpb
mov tmpb, [srcq + wq + 1]
mov [dstq+wq + 1], tmpb
mov tmpb, [srcq + wq + 0]
mov [dstq+wq + 2], tmpb
mov tmpb, [srcq + wq + 3]
mov [dstq+wq + 3], tmpb
add wq, 4
sub xq, 4
jg .loop_scalar
;check if src_size < mmsize * 2
cmp wq, 0
jge .end
.loop_simd:
movu m0, [srcq+wq]
movu m1, [srcq+wq+8]
pshufw m3, m0, 177
pshufw m5, m1, 177
pand m0, m7
pand m3, m6
pand m1, m7
pand m5, m6
por m0, m3
por m1, m5
movu [dstq+wq], m0
movu [dstq+wq + 8], m1
add wq, mmsize*2
jl .loop_simd
.end:
emms
RET
;------------------------------------------------------------------------------
; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------

Loading…
Cancel
Save