|
|
@ -59,14 +59,14 @@ SECTION .text |
|
|
|
%macro yuv2planeX_fn 3 |
|
|
|
%macro yuv2planeX_fn 3 |
|
|
|
|
|
|
|
|
|
|
|
%if ARCH_X86_32 |
|
|
|
%if ARCH_X86_32 |
|
|
|
%define cntr_reg r1 |
|
|
|
%define cntr_reg filterq |
|
|
|
%define movsx mov |
|
|
|
%define movsx mov |
|
|
|
%else |
|
|
|
%else |
|
|
|
%define cntr_reg r11 |
|
|
|
%define cntr_reg r11 |
|
|
|
%define movsx movsxd |
|
|
|
%define movsx movsxd |
|
|
|
%endif |
|
|
|
%endif |
|
|
|
|
|
|
|
|
|
|
|
cglobal yuv2planeX_%1, %3, 7, %2 |
|
|
|
cglobal yuv2planeX_%1, %3, 7, %2, filter, fltsize, src, dst, w, dither, offset |
|
|
|
%if %1 == 8 || %1 == 9 || %1 == 10 |
|
|
|
%if %1 == 8 || %1 == 9 || %1 == 10 |
|
|
|
pxor m6, m6 |
|
|
|
pxor m6, m6 |
|
|
|
%endif ; %1 == 8/9/10 |
|
|
|
%endif ; %1 == 8/9/10 |
|
|
@ -81,8 +81,8 @@ cglobal yuv2planeX_%1, %3, 7, %2 |
|
|
|
%endif ; x86-32 |
|
|
|
%endif ; x86-32 |
|
|
|
|
|
|
|
|
|
|
|
; create registers holding dither |
|
|
|
; create registers holding dither |
|
|
|
movq m_dith, [r5] ; dither |
|
|
|
movq m_dith, [ditherq] ; dither |
|
|
|
test r6d, r6d |
|
|
|
test offsetd, offsetd |
|
|
|
jz .no_rot |
|
|
|
jz .no_rot |
|
|
|
%if mmsize == 16 |
|
|
|
%if mmsize == 16 |
|
|
|
punpcklqdq m_dith, m_dith |
|
|
|
punpcklqdq m_dith, m_dith |
|
|
@ -146,17 +146,17 @@ cglobal yuv2planeX_%1, %3, 7, %2 |
|
|
|
mova m1, [yuv2yuvX_%1_start] |
|
|
|
mova m1, [yuv2yuvX_%1_start] |
|
|
|
mova m2, m1 |
|
|
|
mova m2, m1 |
|
|
|
%endif ; %1 == 8/9/10/16 |
|
|
|
%endif ; %1 == 8/9/10/16 |
|
|
|
movsx cntr_reg, r1m |
|
|
|
movsx cntr_reg, fltsizem |
|
|
|
.filterloop_ %+ %%i: |
|
|
|
.filterloop_ %+ %%i: |
|
|
|
; input pixels |
|
|
|
; input pixels |
|
|
|
mov r6, [r2+gprsize*cntr_reg-2*gprsize] |
|
|
|
mov r6, [srcq+gprsize*cntr_reg-2*gprsize] |
|
|
|
%if %1 == 16 |
|
|
|
%if %1 == 16 |
|
|
|
mova m3, [r6+r5*4] |
|
|
|
mova m3, [r6+r5*4] |
|
|
|
mova m5, [r6+r5*4+mmsize] |
|
|
|
mova m5, [r6+r5*4+mmsize] |
|
|
|
%else ; %1 == 8/9/10 |
|
|
|
%else ; %1 == 8/9/10 |
|
|
|
mova m3, [r6+r5*2] |
|
|
|
mova m3, [r6+r5*2] |
|
|
|
%endif ; %1 == 8/9/10/16 |
|
|
|
%endif ; %1 == 8/9/10/16 |
|
|
|
mov r6, [r2+gprsize*cntr_reg-gprsize] |
|
|
|
mov r6, [srcq+gprsize*cntr_reg-gprsize] |
|
|
|
%if %1 == 16 |
|
|
|
%if %1 == 16 |
|
|
|
mova m4, [r6+r5*4] |
|
|
|
mova m4, [r6+r5*4] |
|
|
|
mova m6, [r6+r5*4+mmsize] |
|
|
|
mova m6, [r6+r5*4+mmsize] |
|
|
@ -165,7 +165,7 @@ cglobal yuv2planeX_%1, %3, 7, %2 |
|
|
|
%endif ; %1 == 8/9/10/16 |
|
|
|
%endif ; %1 == 8/9/10/16 |
|
|
|
|
|
|
|
|
|
|
|
; coefficients |
|
|
|
; coefficients |
|
|
|
movd m0, [r0+2*cntr_reg-4]; coeff[0], coeff[1] |
|
|
|
movd m0, [filterq+2*cntr_reg-4] ; coeff[0], coeff[1] |
|
|
|
%if %1 == 16 |
|
|
|
%if %1 == 16 |
|
|
|
pshuflw m7, m0, 0 ; coeff[0] |
|
|
|
pshuflw m7, m0, 0 ; coeff[0] |
|
|
|
pshuflw m0, m0, 0x55 ; coeff[1] |
|
|
|
pshuflw m0, m0, 0x55 ; coeff[1] |
|
|
@ -207,7 +207,7 @@ cglobal yuv2planeX_%1, %3, 7, %2 |
|
|
|
%if %1 == 8 |
|
|
|
%if %1 == 8 |
|
|
|
packssdw m2, m1 |
|
|
|
packssdw m2, m1 |
|
|
|
packuswb m2, m2 |
|
|
|
packuswb m2, m2 |
|
|
|
movh [r3+r5*1], m2 |
|
|
|
movh [dstq+r5*1], m2 |
|
|
|
%else ; %1 == 9/10/16 |
|
|
|
%else ; %1 == 9/10/16 |
|
|
|
%if %1 == 16 |
|
|
|
%if %1 == 16 |
|
|
|
packssdw m2, m1 |
|
|
|
packssdw m2, m1 |
|
|
@ -221,11 +221,11 @@ cglobal yuv2planeX_%1, %3, 7, %2 |
|
|
|
%endif ; mmx2/sse2/sse4/avx |
|
|
|
%endif ; mmx2/sse2/sse4/avx |
|
|
|
pminsw m2, [yuv2yuvX_%1_upper] |
|
|
|
pminsw m2, [yuv2yuvX_%1_upper] |
|
|
|
%endif ; %1 == 9/10/16 |
|
|
|
%endif ; %1 == 9/10/16 |
|
|
|
mova [r3+r5*2], m2 |
|
|
|
mova [dstq+r5*2], m2 |
|
|
|
%endif ; %1 == 8/9/10/16 |
|
|
|
%endif ; %1 == 8/9/10/16 |
|
|
|
|
|
|
|
|
|
|
|
add r5, mmsize/2 |
|
|
|
add r5, mmsize/2 |
|
|
|
sub r4d, mmsize/2 |
|
|
|
sub wd, mmsize/2 |
|
|
|
%if %1 == 8 |
|
|
|
%if %1 == 8 |
|
|
|
%assign %%i %%i+2 |
|
|
|
%assign %%i %%i+2 |
|
|
|
%endrep |
|
|
|
%endrep |
|
|
|