|
|
|
@ -246,7 +246,7 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 |
|
|
|
|
IDCT8_ADD_SSE r0, r1, r2, r3 |
|
|
|
|
RET |
|
|
|
|
|
|
|
|
|
%macro DC_ADD_MMX2_INIT 2-3 |
|
|
|
|
%macro DC_ADD_MMXEXT_INIT 2-3 |
|
|
|
|
%if %0 == 2 |
|
|
|
|
movsx %1, word [%1] |
|
|
|
|
add %1, 32 |
|
|
|
@ -266,7 +266,7 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 |
|
|
|
|
packuswb m1, m1 |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro DC_ADD_MMX2_OP 4 |
|
|
|
|
%macro DC_ADD_MMXEXT_OP 4 |
|
|
|
|
%1 m2, [%2 ] |
|
|
|
|
%1 m3, [%2+%3 ] |
|
|
|
|
%1 m4, [%2+%3*2] |
|
|
|
@ -288,16 +288,16 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 |
|
|
|
|
INIT_MMX |
|
|
|
|
; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) |
|
|
|
|
cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0 |
|
|
|
|
DC_ADD_MMX2_INIT r1, r2 |
|
|
|
|
DC_ADD_MMX2_OP movh, r0, r2, r1 |
|
|
|
|
DC_ADD_MMXEXT_INIT r1, r2 |
|
|
|
|
DC_ADD_MMXEXT_OP movh, r0, r2, r1 |
|
|
|
|
RET |
|
|
|
|
|
|
|
|
|
; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) |
|
|
|
|
cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0 |
|
|
|
|
DC_ADD_MMX2_INIT r1, r2 |
|
|
|
|
DC_ADD_MMX2_OP mova, r0, r2, r1 |
|
|
|
|
DC_ADD_MMXEXT_INIT r1, r2 |
|
|
|
|
DC_ADD_MMXEXT_OP mova, r0, r2, r1 |
|
|
|
|
lea r0, [r0+r2*4] |
|
|
|
|
DC_ADD_MMX2_OP mova, r0, r2, r1 |
|
|
|
|
DC_ADD_MMXEXT_OP mova, r0, r2, r1 |
|
|
|
|
RET |
|
|
|
|
|
|
|
|
|
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, |
|
|
|
@ -371,14 +371,14 @@ cglobal h264_idct_add16_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s |
|
|
|
|
movsx r6, word [r2] |
|
|
|
|
test r6, r6 |
|
|
|
|
jz .no_dc |
|
|
|
|
DC_ADD_MMX2_INIT r2, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_INIT r2, r3, r6 |
|
|
|
|
%if ARCH_X86_64 == 0 |
|
|
|
|
%define dst2q r1 |
|
|
|
|
%define dst2d r1d |
|
|
|
|
%endif |
|
|
|
|
mov dst2d, dword [r1+r5*4] |
|
|
|
|
lea dst2q, [r0+dst2q] |
|
|
|
|
DC_ADD_MMX2_OP movh, dst2q, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_OP movh, dst2q, r3, r6 |
|
|
|
|
%if ARCH_X86_64 == 0 |
|
|
|
|
mov r1, r1m |
|
|
|
|
%endif |
|
|
|
@ -445,14 +445,14 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo |
|
|
|
|
movsx r6, word [r2] |
|
|
|
|
test r6, r6 |
|
|
|
|
jz .skipblock |
|
|
|
|
DC_ADD_MMX2_INIT r2, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_INIT r2, r3, r6 |
|
|
|
|
%if ARCH_X86_64 == 0 |
|
|
|
|
%define dst2q r1 |
|
|
|
|
%define dst2d r1d |
|
|
|
|
%endif |
|
|
|
|
mov dst2d, dword [r1+r5*4] |
|
|
|
|
add dst2q, r0 |
|
|
|
|
DC_ADD_MMX2_OP movh, dst2q, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_OP movh, dst2q, r3, r6 |
|
|
|
|
%if ARCH_X86_64 == 0 |
|
|
|
|
mov r1, r1m |
|
|
|
|
%endif |
|
|
|
@ -483,16 +483,16 @@ cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s |
|
|
|
|
movsx r6, word [r2] |
|
|
|
|
test r6, r6 |
|
|
|
|
jz .no_dc |
|
|
|
|
DC_ADD_MMX2_INIT r2, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_INIT r2, r3, r6 |
|
|
|
|
%if ARCH_X86_64 == 0 |
|
|
|
|
%define dst2q r1 |
|
|
|
|
%define dst2d r1d |
|
|
|
|
%endif |
|
|
|
|
mov dst2d, dword [r1+r5*4] |
|
|
|
|
lea dst2q, [r0+dst2q] |
|
|
|
|
DC_ADD_MMX2_OP mova, dst2q, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 |
|
|
|
|
lea dst2q, [dst2q+r3*4] |
|
|
|
|
DC_ADD_MMX2_OP mova, dst2q, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 |
|
|
|
|
%if ARCH_X86_64 == 0 |
|
|
|
|
mov r1, r1m |
|
|
|
|
%endif |
|
|
|
@ -541,16 +541,16 @@ cglobal h264_idct8_add4_8_sse2, 5, 8 + npicregs, 10, dst1, block_offset, block, |
|
|
|
|
test r6, r6 |
|
|
|
|
jz .no_dc |
|
|
|
|
INIT_MMX |
|
|
|
|
DC_ADD_MMX2_INIT r2, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_INIT r2, r3, r6 |
|
|
|
|
%if ARCH_X86_64 == 0 |
|
|
|
|
%define dst2q r1 |
|
|
|
|
%define dst2d r1d |
|
|
|
|
%endif |
|
|
|
|
mov dst2d, dword [r1+r5*4] |
|
|
|
|
add dst2q, r0 |
|
|
|
|
DC_ADD_MMX2_OP mova, dst2q, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 |
|
|
|
|
lea dst2q, [dst2q+r3*4] |
|
|
|
|
DC_ADD_MMX2_OP mova, dst2q, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 |
|
|
|
|
%if ARCH_X86_64 == 0 |
|
|
|
|
mov r1, r1m |
|
|
|
|
%endif |
|
|
|
@ -644,7 +644,7 @@ h264_idct_add8_mmx2_plane: |
|
|
|
|
movsx r6, word [r2] |
|
|
|
|
test r6, r6 |
|
|
|
|
jz .skipblock |
|
|
|
|
DC_ADD_MMX2_INIT r2, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_INIT r2, r3, r6 |
|
|
|
|
%if ARCH_X86_64 |
|
|
|
|
mov r0d, dword [r1+r5*4] |
|
|
|
|
add r0, [dst2q] |
|
|
|
@ -653,7 +653,7 @@ h264_idct_add8_mmx2_plane: |
|
|
|
|
mov r0, [r0] |
|
|
|
|
add r0, dword [r1+r5*4] |
|
|
|
|
%endif |
|
|
|
|
DC_ADD_MMX2_OP movh, r0, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_OP movh, r0, r3, r6 |
|
|
|
|
.skipblock: |
|
|
|
|
inc r5 |
|
|
|
|
add r2, 32 |
|
|
|
@ -697,7 +697,7 @@ h264_idct_dc_add8_mmx2: |
|
|
|
|
pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D |
|
|
|
|
punpcklwd m0, m0 ; d d d d D D D D |
|
|
|
|
lea r6, [r3*3] |
|
|
|
|
DC_ADD_MMX2_OP movq, r0, r3, r6 |
|
|
|
|
DC_ADD_MMXEXT_OP movq, r0, r3, r6 |
|
|
|
|
ret |
|
|
|
|
|
|
|
|
|
ALIGN 16 |
|
|
|
|