|
|
|
@ -27,7 +27,8 @@ cextern pw_64 |
|
|
|
|
|
|
|
|
|
SECTION .text |
|
|
|
|
|
|
|
|
|
%macro DIAG4_MMX 6 |
|
|
|
|
%macro DIAG4 6 |
|
|
|
|
%if mmsize == 8 |
|
|
|
|
movq m0, [%1+%2] |
|
|
|
|
movq m1, [%1+%3] |
|
|
|
|
movq m3, m0 |
|
|
|
@ -64,9 +65,7 @@ SECTION .text |
|
|
|
|
psraw m3, 7 |
|
|
|
|
packuswb m0, m3 |
|
|
|
|
movq [%6], m0 |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro DIAG4_SSE2 6 |
|
|
|
|
%else ; mmsize == 16 |
|
|
|
|
movq m0, [%1+%2] |
|
|
|
|
movq m1, [%1+%3] |
|
|
|
|
punpcklbw m0, m7 |
|
|
|
@ -86,9 +85,11 @@ SECTION .text |
|
|
|
|
psraw m0, 7 |
|
|
|
|
packuswb m0, m0 |
|
|
|
|
movq [%6], m0 |
|
|
|
|
%endif ; mmsize == 8/16 |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro SPLAT4REGS_MMX 0 |
|
|
|
|
%macro SPLAT4REGS 0 |
|
|
|
|
%if mmsize == 8 |
|
|
|
|
movq m5, m3 |
|
|
|
|
punpcklwd m3, m3 |
|
|
|
|
movq m4, m3 |
|
|
|
@ -102,9 +103,7 @@ SECTION .text |
|
|
|
|
movq [rsp+8*12], m4 |
|
|
|
|
movq [rsp+8*13], m5 |
|
|
|
|
movq [rsp+8*14], m2 |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro SPLAT4REGS_SSE2 0 |
|
|
|
|
%else ; mmsize == 16 |
|
|
|
|
pshuflw m4, m3, 0x0 |
|
|
|
|
pshuflw m5, m3, 0x55 |
|
|
|
|
pshuflw m6, m3, 0xAA |
|
|
|
@ -113,15 +112,16 @@ SECTION .text |
|
|
|
|
punpcklqdq m5, m5 |
|
|
|
|
punpcklqdq m6, m6 |
|
|
|
|
punpcklqdq m3, m3 |
|
|
|
|
%endif ; mmsize == 8/16 |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro vp6_filter_diag4 2 |
|
|
|
|
%macro vp6_filter_diag4 0 |
|
|
|
|
; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride, |
|
|
|
|
; const int16_t h_weight[4], const int16_t v_weights[4]) |
|
|
|
|
cglobal vp6_filter_diag4_%1, 5, 7, %2 |
|
|
|
|
cglobal vp6_filter_diag4, 5, 7, 8 |
|
|
|
|
mov r5, rsp ; backup stack pointer |
|
|
|
|
and rsp, ~(mmsize-1) ; align stack |
|
|
|
|
%ifidn %1, sse2 |
|
|
|
|
%if mmsize == 16 |
|
|
|
|
sub rsp, 8*11 |
|
|
|
|
%else |
|
|
|
|
sub rsp, 8*15 |
|
|
|
@ -162,12 +162,8 @@ cglobal vp6_filter_diag4_%1, 5, 7, %2 |
|
|
|
|
RET |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
INIT_MMX |
|
|
|
|
%define DIAG4 DIAG4_MMX |
|
|
|
|
%define SPLAT4REGS SPLAT4REGS_MMX |
|
|
|
|
vp6_filter_diag4 mmx, 0 |
|
|
|
|
INIT_MMX mmx |
|
|
|
|
vp6_filter_diag4 |
|
|
|
|
|
|
|
|
|
INIT_XMM |
|
|
|
|
%define DIAG4 DIAG4_SSE2 |
|
|
|
|
%define SPLAT4REGS SPLAT4REGS_SSE2 |
|
|
|
|
vp6_filter_diag4 sse2, 8 |
|
|
|
|
INIT_XMM sse2 |
|
|
|
|
vp6_filter_diag4 |
|
|
|
|