Merge commit '307eb1a8ee363db1fcf869e427a8deb6d9538881'

* commit '307eb1a8ee363db1fcf869e427a8deb6d9538881':
  x86: vp8dsp: port FILTER_BILINEAR macro to cpuflags

Merged-by: James Almer <jamrial@gmail.com>
pull/272/head
James Almer 7 years ago
commit 53eea3a569
  1. 143
      libavcodec/x86/vp8dsp.asm

@ -664,6 +664,37 @@ INIT_XMM sse2
FILTER_V 8
%macro FILTER_BILINEAR 1
%if cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
%ifdef PIC
lea picregq, [bilinear_filter_vb_m]
%endif
pxor m4, m4
mova m3, [bilinear_filter_vb+myq-16]
.nextrow:
movh m0, [srcq+srcstrideq*0]
movh m1, [srcq+srcstrideq*1]
movh m2, [srcq+srcstrideq*2]
punpcklbw m0, m1
punpcklbw m1, m2
pmaddubsw m0, m3
pmaddubsw m1, m3
psraw m0, 2
psraw m1, 2
pavgw m0, m4
pavgw m1, m4
%if mmsize==8
packuswb m0, m0
packuswb m1, m1
movh [dstq+dststrideq*0], m0
movh [dstq+dststrideq*1], m1
%else
packuswb m0, m1
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
%ifdef PIC
@ -701,6 +732,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@ -708,6 +740,37 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
jg .nextrow
REP_RET
%if cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
%ifdef PIC
lea picregq, [bilinear_filter_vb_m]
%endif
pxor m4, m4
mova m2, [filter_h2_shuf]
mova m3, [bilinear_filter_vb+mxq-16]
.nextrow:
movu m0, [srcq+srcstrideq*0]
movu m1, [srcq+srcstrideq*1]
pshufb m0, m2
pshufb m1, m2
pmaddubsw m0, m3
pmaddubsw m1, m3
psraw m0, 2
psraw m1, 2
pavgw m0, m4
pavgw m1, m4
%if mmsize==8
packuswb m0, m0
packuswb m1, m1
movh [dstq+dststrideq*0], m0
movh [dstq+dststrideq*1], m1
%else
packuswb m0, m1
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
%ifdef PIC
@ -746,6 +809,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@ -758,85 +822,10 @@ INIT_MMX mmxext
FILTER_BILINEAR 4
INIT_XMM sse2
FILTER_BILINEAR 8
%macro FILTER_BILINEAR_SSSE3 1
cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
%ifdef PIC
lea picregq, [bilinear_filter_vb_m]
%endif
pxor m4, m4
mova m3, [bilinear_filter_vb+myq-16]
.nextrow:
movh m0, [srcq+srcstrideq*0]
movh m1, [srcq+srcstrideq*1]
movh m2, [srcq+srcstrideq*2]
punpcklbw m0, m1
punpcklbw m1, m2
pmaddubsw m0, m3
pmaddubsw m1, m3
psraw m0, 2
psraw m1, 2
pavgw m0, m4
pavgw m1, m4
%if mmsize==8
packuswb m0, m0
packuswb m1, m1
movh [dstq+dststrideq*0], m0
movh [dstq+dststrideq*1], m1
%else
packuswb m0, m1
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
sub heightd, 2
jg .nextrow
REP_RET
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
%ifdef PIC
lea picregq, [bilinear_filter_vb_m]
%endif
pxor m4, m4
mova m2, [filter_h2_shuf]
mova m3, [bilinear_filter_vb+mxq-16]
.nextrow:
movu m0, [srcq+srcstrideq*0]
movu m1, [srcq+srcstrideq*1]
pshufb m0, m2
pshufb m1, m2
pmaddubsw m0, m3
pmaddubsw m1, m3
psraw m0, 2
psraw m1, 2
pavgw m0, m4
pavgw m1, m4
%if mmsize==8
packuswb m0, m0
packuswb m1, m1
movh [dstq+dststrideq*0], m0
movh [dstq+dststrideq*1], m1
%else
packuswb m0, m1
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
sub heightd, 2
jg .nextrow
REP_RET
%endmacro
INIT_MMX ssse3
FILTER_BILINEAR_SSSE3 4
FILTER_BILINEAR 4
INIT_XMM ssse3
FILTER_BILINEAR_SSSE3 8
FILTER_BILINEAR 8
INIT_MMX mmx
cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height

Loading…
Cancel
Save