|
|
@ -103,15 +103,8 @@ cglobal pred16x16_horizontal, 2,3 |
|
|
|
%else |
|
|
|
%else |
|
|
|
punpcklbw m0, m0 |
|
|
|
punpcklbw m0, m0 |
|
|
|
punpcklbw m1, m1 |
|
|
|
punpcklbw m1, m1 |
|
|
|
%if cpuflag(mmx2) |
|
|
|
SPLATW m0, m0, 3 |
|
|
|
pshufw m0, m0, 0xff |
|
|
|
SPLATW m1, m1, 3 |
|
|
|
pshufw m1, m1, 0xff |
|
|
|
|
|
|
|
%else |
|
|
|
|
|
|
|
punpckhwd m0, m0 |
|
|
|
|
|
|
|
punpckhwd m1, m1 |
|
|
|
|
|
|
|
punpckhdq m0, m0 |
|
|
|
|
|
|
|
punpckhdq m1, m1 |
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
mova [r0+r1*0+8], m0 |
|
|
|
mova [r0+r1*0+8], m0 |
|
|
|
mova [r0+r1*1+8], m1 |
|
|
|
mova [r0+r1*1+8], m1 |
|
|
|
%endif |
|
|
|
%endif |
|
|
@ -162,18 +155,8 @@ cglobal pred16x16_dc, 2,7 |
|
|
|
shr r2d, 5 |
|
|
|
shr r2d, 5 |
|
|
|
%if cpuflag(ssse3) |
|
|
|
%if cpuflag(ssse3) |
|
|
|
pxor m1, m1 |
|
|
|
pxor m1, m1 |
|
|
|
movd m0, r2d |
|
|
|
|
|
|
|
pshufb m0, m1 |
|
|
|
|
|
|
|
%elif cpuflag(sse2) |
|
|
|
|
|
|
|
movd m0, r2d |
|
|
|
|
|
|
|
punpcklbw m0, m0 |
|
|
|
|
|
|
|
pshuflw m0, m0, 0 |
|
|
|
|
|
|
|
punpcklqdq m0, m0 |
|
|
|
|
|
|
|
%elif cpuflag(mmx2) |
|
|
|
|
|
|
|
movd m0, r2d |
|
|
|
|
|
|
|
punpcklbw m0, m0 |
|
|
|
|
|
|
|
pshufw m0, m0, 0 |
|
|
|
|
|
|
|
%endif |
|
|
|
%endif |
|
|
|
|
|
|
|
SPLATB_REG m0, r2d, m1 |
|
|
|
|
|
|
|
|
|
|
|
%if mmsize==8 |
|
|
|
%if mmsize==8 |
|
|
|
mov r3d, 8 |
|
|
|
mov r3d, 8 |
|
|
@ -227,12 +210,7 @@ cglobal pred16x16_tm_vp8, 2,5 |
|
|
|
movzx r2d, byte [r0+r1-1] |
|
|
|
movzx r2d, byte [r0+r1-1] |
|
|
|
sub r2d, r3d |
|
|
|
sub r2d, r3d |
|
|
|
movd mm4, r2d |
|
|
|
movd mm4, r2d |
|
|
|
%if cpuflag(mmx2) |
|
|
|
SPLATW mm4, mm4, 0 |
|
|
|
pshufw mm4, mm4, 0 |
|
|
|
|
|
|
|
%else |
|
|
|
|
|
|
|
punpcklwd mm4, mm4 |
|
|
|
|
|
|
|
punpckldq mm4, mm4 |
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
movq mm5, mm4 |
|
|
|
movq mm5, mm4 |
|
|
|
movq mm6, mm4 |
|
|
|
movq mm6, mm4 |
|
|
|
movq mm7, mm4 |
|
|
|
movq mm7, mm4 |
|
|
@ -332,19 +310,15 @@ cglobal pred16x16_plane_%1, 2,9,7 |
|
|
|
movhlps m1, m0 |
|
|
|
movhlps m1, m0 |
|
|
|
%endif |
|
|
|
%endif |
|
|
|
paddw m0, m1 |
|
|
|
paddw m0, m1 |
|
|
|
%if cpuflag(sse2) |
|
|
|
%if cpuflag(mmx2) |
|
|
|
pshuflw m1, m0, 0xE |
|
|
|
PSHUFLW m1, m0, 0xE |
|
|
|
%elif cpuflag(mmx2) |
|
|
|
|
|
|
|
pshufw m1, m0, 0xE |
|
|
|
|
|
|
|
%elif cpuflag(mmx) |
|
|
|
%elif cpuflag(mmx) |
|
|
|
mova m1, m0 |
|
|
|
mova m1, m0 |
|
|
|
psrlq m1, 32 |
|
|
|
psrlq m1, 32 |
|
|
|
%endif |
|
|
|
%endif |
|
|
|
paddw m0, m1 |
|
|
|
paddw m0, m1 |
|
|
|
%if cpuflag(sse2) |
|
|
|
%if cpuflag(mmx2) |
|
|
|
pshuflw m1, m0, 0x1 |
|
|
|
PSHUFLW m1, m0, 0x1 |
|
|
|
%elif cpuflag(mmx2) |
|
|
|
|
|
|
|
pshufw m1, m0, 0x1 |
|
|
|
|
|
|
|
%elif cpuflag(mmx) |
|
|
|
%elif cpuflag(mmx) |
|
|
|
mova m1, m0 |
|
|
|
mova m1, m0 |
|
|
|
psrlq m1, 16 |
|
|
|
psrlq m1, 16 |
|
|
@ -483,25 +457,9 @@ cglobal pred16x16_plane_%1, 2,9,7 |
|
|
|
|
|
|
|
|
|
|
|
movd m1, r5d |
|
|
|
movd m1, r5d |
|
|
|
movd m3, r3d |
|
|
|
movd m3, r3d |
|
|
|
%if cpuflag(sse2) |
|
|
|
SPLATW m0, m0, 0 ; H |
|
|
|
pshuflw m0, m0, 0x0 |
|
|
|
SPLATW m1, m1, 0 ; V |
|
|
|
pshuflw m1, m1, 0x0 |
|
|
|
SPLATW m3, m3, 0 ; a |
|
|
|
pshuflw m3, m3, 0x0 |
|
|
|
|
|
|
|
punpcklqdq m0, m0 ; splat H (words) |
|
|
|
|
|
|
|
punpcklqdq m1, m1 ; splat V (words) |
|
|
|
|
|
|
|
punpcklqdq m3, m3 ; splat a (words) |
|
|
|
|
|
|
|
%elif cpuflag(mmx2) |
|
|
|
|
|
|
|
pshufw m0, m0, 0x0 |
|
|
|
|
|
|
|
pshufw m1, m1, 0x0 |
|
|
|
|
|
|
|
pshufw m3, m3, 0x0 |
|
|
|
|
|
|
|
%elif cpuflag(mmx) |
|
|
|
|
|
|
|
punpcklwd m0, m0 |
|
|
|
|
|
|
|
punpcklwd m1, m1 |
|
|
|
|
|
|
|
punpcklwd m3, m3 |
|
|
|
|
|
|
|
punpckldq m0, m0 |
|
|
|
|
|
|
|
punpckldq m1, m1 |
|
|
|
|
|
|
|
punpckldq m3, m3 |
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
%ifidn %1, svq3 |
|
|
|
%ifidn %1, svq3 |
|
|
|
SWAP 0, 1 |
|
|
|
SWAP 0, 1 |
|
|
|
%endif |
|
|
|
%endif |
|
|
@ -626,10 +584,8 @@ cglobal pred8x8_plane, 2,9,7 |
|
|
|
paddw m0, m1 |
|
|
|
paddw m0, m1 |
|
|
|
|
|
|
|
|
|
|
|
%if notcpuflag(ssse3) |
|
|
|
%if notcpuflag(ssse3) |
|
|
|
%if cpuflag(sse2) ; mmsize == 16 |
|
|
|
%if cpuflag(mmx2) |
|
|
|
pshuflw m1, m0, 0xE |
|
|
|
PSHUFLW m1, m0, 0xE |
|
|
|
%elif cpuflag(mmx2) |
|
|
|
|
|
|
|
pshufw m1, m0, 0xE |
|
|
|
|
|
|
|
%elif cpuflag(mmx) |
|
|
|
%elif cpuflag(mmx) |
|
|
|
mova m1, m0 |
|
|
|
mova m1, m0 |
|
|
|
psrlq m1, 32 |
|
|
|
psrlq m1, 32 |
|
|
@ -637,10 +593,8 @@ cglobal pred8x8_plane, 2,9,7 |
|
|
|
paddw m0, m1 |
|
|
|
paddw m0, m1 |
|
|
|
%endif ; !ssse3 |
|
|
|
%endif ; !ssse3 |
|
|
|
|
|
|
|
|
|
|
|
%if cpuflag(sse2) |
|
|
|
%if cpuflag(mmx2) |
|
|
|
pshuflw m1, m0, 0x1 |
|
|
|
PSHUFLW m1, m0, 0x1 |
|
|
|
%elif cpuflag(mmx2) |
|
|
|
|
|
|
|
pshufw m1, m0, 0x1 |
|
|
|
|
|
|
|
%elif cpuflag(mmx) |
|
|
|
%elif cpuflag(mmx) |
|
|
|
mova m1, m0 |
|
|
|
mova m1, m0 |
|
|
|
psrlq m1, 16 |
|
|
|
psrlq m1, 16 |
|
|
@ -711,25 +665,9 @@ cglobal pred8x8_plane, 2,9,7 |
|
|
|
|
|
|
|
|
|
|
|
movd m1, r5d |
|
|
|
movd m1, r5d |
|
|
|
movd m3, r3d |
|
|
|
movd m3, r3d |
|
|
|
%if cpuflag(sse2) |
|
|
|
SPLATW m0, m0, 0 ; H |
|
|
|
pshuflw m0, m0, 0x0 |
|
|
|
SPLATW m1, m1, 0 ; V |
|
|
|
pshuflw m1, m1, 0x0 |
|
|
|
SPLATW m3, m3, 0 ; a |
|
|
|
pshuflw m3, m3, 0x0 |
|
|
|
|
|
|
|
punpcklqdq m0, m0 ; splat H (words) |
|
|
|
|
|
|
|
punpcklqdq m1, m1 ; splat V (words) |
|
|
|
|
|
|
|
punpcklqdq m3, m3 ; splat a (words) |
|
|
|
|
|
|
|
%elif cpuflag(mmx2) |
|
|
|
|
|
|
|
pshufw m0, m0, 0x0 |
|
|
|
|
|
|
|
pshufw m1, m1, 0x0 |
|
|
|
|
|
|
|
pshufw m3, m3, 0x0 |
|
|
|
|
|
|
|
%elif cpuflag(mmx) |
|
|
|
|
|
|
|
punpcklwd m0, m0 |
|
|
|
|
|
|
|
punpcklwd m1, m1 |
|
|
|
|
|
|
|
punpcklwd m3, m3 |
|
|
|
|
|
|
|
punpckldq m0, m0 |
|
|
|
|
|
|
|
punpckldq m1, m1 |
|
|
|
|
|
|
|
punpckldq m3, m3 |
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
%if mmsize == 8 |
|
|
|
%if mmsize == 8 |
|
|
|
mova m2, m0 |
|
|
|
mova m2, m0 |
|
|
|
%endif |
|
|
|
%endif |
|
|
@ -815,24 +753,8 @@ cglobal pred8x8_horizontal, 2,3 |
|
|
|
mova m2, [pb_3] |
|
|
|
mova m2, [pb_3] |
|
|
|
%endif |
|
|
|
%endif |
|
|
|
.loop: |
|
|
|
.loop: |
|
|
|
movd m0, [r0+r1*0-4] |
|
|
|
SPLATB_LOAD m0, r0+r1*0-1, m2 |
|
|
|
movd m1, [r0+r1*1-4] |
|
|
|
SPLATB_LOAD m1, r0+r1*1-1, m2 |
|
|
|
%if cpuflag(ssse3) |
|
|
|
|
|
|
|
pshufb m0, m2 |
|
|
|
|
|
|
|
pshufb m1, m2 |
|
|
|
|
|
|
|
%else |
|
|
|
|
|
|
|
punpcklbw m0, m0 |
|
|
|
|
|
|
|
punpcklbw m1, m1 |
|
|
|
|
|
|
|
%if cpuflag(mmx2) |
|
|
|
|
|
|
|
pshufw m0, m0, 0xff |
|
|
|
|
|
|
|
pshufw m1, m1, 0xff |
|
|
|
|
|
|
|
%else |
|
|
|
|
|
|
|
punpckhwd m0, m0 |
|
|
|
|
|
|
|
punpckhwd m1, m1 |
|
|
|
|
|
|
|
punpckhdq m0, m0 |
|
|
|
|
|
|
|
punpckhdq m1, m1 |
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
mova [r0+r1*0], m0 |
|
|
|
mova [r0+r1*0], m0 |
|
|
|
mova [r0+r1*1], m1 |
|
|
|
mova [r0+r1*1], m1 |
|
|
|
lea r0, [r0+r1*2] |
|
|
|
lea r0, [r0+r1*2] |
|
|
@ -1000,15 +922,8 @@ cglobal pred8x8_tm_vp8, 2,6 |
|
|
|
sub r3d, r4d |
|
|
|
sub r3d, r4d |
|
|
|
movd mm2, r2d |
|
|
|
movd mm2, r2d |
|
|
|
movd mm4, r3d |
|
|
|
movd mm4, r3d |
|
|
|
%if cpuflag(mmx2) |
|
|
|
SPLATW mm2, mm2, 0 |
|
|
|
pshufw mm2, mm2, 0 |
|
|
|
SPLATW mm4, mm4, 0 |
|
|
|
pshufw mm4, mm4, 0 |
|
|
|
|
|
|
|
%else |
|
|
|
|
|
|
|
punpcklwd mm2, mm2 |
|
|
|
|
|
|
|
punpcklwd mm4, mm4 |
|
|
|
|
|
|
|
punpckldq mm2, mm2 |
|
|
|
|
|
|
|
punpckldq mm4, mm4 |
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
movq mm3, mm2 |
|
|
|
movq mm3, mm2 |
|
|
|
movq mm5, mm4 |
|
|
|
movq mm5, mm4 |
|
|
|
paddw mm2, mm0 |
|
|
|
paddw mm2, mm0 |
|
|
|