|
|
|
@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max |
|
|
|
|
apply_noise_main: |
|
|
|
|
%if ARCH_X86_64 == 0 || WIN64 |
|
|
|
|
mov kxd, m_maxm |
|
|
|
|
%define count kxq |
|
|
|
|
DEFINE_ARGS Y, s_m, q_filt, noise, count |
|
|
|
|
%else |
|
|
|
|
%define count m_maxq |
|
|
|
|
DEFINE_ARGS Y, s_m, q_filt, noise, kx, count |
|
|
|
|
%endif |
|
|
|
|
movsxdifnidn noiseq, noised |
|
|
|
|
dec noiseq |
|
|
|
|
shl count, 2 |
|
|
|
|
shl countd, 2 |
|
|
|
|
%ifdef PIC |
|
|
|
|
lea NOISE_TABLE, [sbr_noise_table] |
|
|
|
|
%endif |
|
|
|
|
lea Yq, [Yq + 2*count] |
|
|
|
|
add s_mq, count |
|
|
|
|
add q_filtq, count |
|
|
|
|
lea Yq, [Yq + 2*countq] |
|
|
|
|
add s_mq, countq |
|
|
|
|
add q_filtq, countq |
|
|
|
|
shl noiseq, 3 |
|
|
|
|
pxor m5, m5 |
|
|
|
|
neg count |
|
|
|
|
neg countq |
|
|
|
|
.loop: |
|
|
|
|
mova m1, [q_filtq + count] |
|
|
|
|
mova m1, [q_filtq + countq] |
|
|
|
|
movu m3, [noiseq + NOISE_TABLE + 1*mmsize] |
|
|
|
|
movu m4, [noiseq + NOISE_TABLE + 2*mmsize] |
|
|
|
|
add noiseq, 2*mmsize |
|
|
|
@ -404,7 +404,7 @@ apply_noise_main: |
|
|
|
|
punpckldq m1, m1 |
|
|
|
|
mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise] |
|
|
|
|
mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise] |
|
|
|
|
mova m3, [s_mq + count] |
|
|
|
|
mova m3, [s_mq + countq] |
|
|
|
|
; TODO: replace by a vpermd in AVX2 |
|
|
|
|
punpckhdq m4, m3, m3 |
|
|
|
|
punpckldq m3, m3 |
|
|
|
@ -414,15 +414,15 @@ apply_noise_main: |
|
|
|
|
mulps m4, m0 ; s_m[m] * phi_sign |
|
|
|
|
pand m1, m6 |
|
|
|
|
pand m2, m7 |
|
|
|
|
movu m6, [Yq + 2*count] |
|
|
|
|
movu m7, [Yq + 2*count + mmsize] |
|
|
|
|
movu m6, [Yq + 2*countq] |
|
|
|
|
movu m7, [Yq + 2*countq + mmsize] |
|
|
|
|
addps m3, m1 |
|
|
|
|
addps m4, m2 |
|
|
|
|
addps m6, m3 |
|
|
|
|
addps m7, m4 |
|
|
|
|
movu [Yq + 2*count], m6 |
|
|
|
|
movu [Yq + 2*count + mmsize], m7 |
|
|
|
|
add count, mmsize |
|
|
|
|
movu [Yq + 2*countq], m6 |
|
|
|
|
movu [Yq + 2*countq + mmsize], m7 |
|
|
|
|
add countq, mmsize |
|
|
|
|
jl .loop |
|
|
|
|
RET |
|
|
|
|
|
|
|
|
|