|
|
|
@ -87,6 +87,26 @@ QPEL_TABLE 12, 4, w, sse4 |
|
|
|
|
QPEL_TABLE 8,16, b, avx2 |
|
|
|
|
QPEL_TABLE 10, 8, w, avx2 |
|
|
|
|
|
|
|
|
|
QPEL_TABLE 8, 1, b, avx512icl_h |
|
|
|
|
QPEL_TABLE 8, 1, d, avx512icl_v |
|
|
|
|
|
|
|
|
|
pb_qpel_shuffle_index: db 0, 1, 2, 3 |
|
|
|
|
db 1, 2, 3, 4 |
|
|
|
|
db 2, 3, 4, 5 |
|
|
|
|
db 3, 4, 5, 6 |
|
|
|
|
db 4, 5, 6, 7 |
|
|
|
|
db 5, 6, 7, 8 |
|
|
|
|
db 6, 7, 8, 9 |
|
|
|
|
db 7, 8, 9, 10 |
|
|
|
|
db 4, 5, 6, 7 |
|
|
|
|
db 5, 6, 7, 8 |
|
|
|
|
db 6, 7, 8, 9 |
|
|
|
|
db 7, 8, 9, 10 |
|
|
|
|
db 8, 9, 10, 11 |
|
|
|
|
db 9, 10, 11, 12 |
|
|
|
|
db 10, 11, 12, 13 |
|
|
|
|
db 11, 12, 13, 14 |
|
|
|
|
|
|
|
|
|
SECTION .text |
|
|
|
|
|
|
|
|
|
%define MAX_PB_SIZE 64 |
|
|
|
@ -1670,3 +1690,120 @@ HEVC_PUT_HEVC_QPEL_HV 16, 10 |
|
|
|
|
|
|
|
|
|
%endif ;AVX2 |
|
|
|
|
%endif ; ARCH_X86_64 |
|
|
|
|
|
|
|
|
|
%macro QPEL_FILTER_H 5 |
|
|
|
|
%define %%table hevc_qpel_filters_avx512icl_h_%1 |
|
|
|
|
%assign %%offset 4 |
|
|
|
|
dec %2q |
|
|
|
|
shl %2q, 3 |
|
|
|
|
%ifdef PIC |
|
|
|
|
lea %5q, [%%table] |
|
|
|
|
%define FILTER %5q |
|
|
|
|
%else |
|
|
|
|
%define FILTER %%table |
|
|
|
|
%endif |
|
|
|
|
vpbroadcastd m%3, [FILTER + %2q + 0*%%offset] |
|
|
|
|
vpbroadcastd m%4, [FILTER + %2q + 1*%%offset] |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro QPEL_FILTER_V 5 |
|
|
|
|
vpbroadcastd m%3, [%5 + %2q + 4*%4] |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro QPEL_LOAD_SHUF 2 |
|
|
|
|
movu m%1, [pb_qpel_shuffle_index + 0] |
|
|
|
|
movu m%2, [pb_qpel_shuffle_index + 32] |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
; required: m0-m5 |
|
|
|
|
; %1: dst register index |
|
|
|
|
; %2: name for src |
|
|
|
|
%macro QPEL_H_LOAD_COMPUTE 2 |
|
|
|
|
pxor m%1, m%1 |
|
|
|
|
movu xm4, [%2q - 3] |
|
|
|
|
vpermb m5, m2, m4 |
|
|
|
|
vpermb m4, m3, m4 |
|
|
|
|
vpdpbusd m%1, m5, m0 |
|
|
|
|
vpdpbusd m%1, m4, m1 |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro HEVC_PUT_HEVC_QPEL_AVX512ICL 2 |
|
|
|
|
cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 8, dst, src, srcstride, height, mx, tmp |
|
|
|
|
QPEL_FILTER_H %1, mx, 0, 1, tmp |
|
|
|
|
QPEL_LOAD_SHUF 2, 3 |
|
|
|
|
.loop: |
|
|
|
|
QPEL_H_LOAD_COMPUTE 6, src |
|
|
|
|
vpmovdw [dstq], m6 |
|
|
|
|
LOOP_END dst, src, srcstride |
|
|
|
|
RET |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%macro HEVC_PUT_HEVC_QPEL_HV_AVX512ICL 2 |
|
|
|
|
cglobal hevc_put_hevc_qpel_hv%1_%2, 6, 7, 27, dst, src, srcstride, height, mx, my, tmp |
|
|
|
|
%assign %%shift 6 |
|
|
|
|
%assign %%extra 7 |
|
|
|
|
QPEL_FILTER_H %1, mx, 0, 1, tmp |
|
|
|
|
QPEL_LOAD_SHUF 2, 3 |
|
|
|
|
lea tmpq, [srcstrideq*3] |
|
|
|
|
sub srcq, tmpq |
|
|
|
|
sub myq, 1 |
|
|
|
|
shl myq, 5 |
|
|
|
|
%ifdef PIC |
|
|
|
|
%define %%table hevc_qpel_filters_avx512icl_v_%1 |
|
|
|
|
lea tmpq, [%%table] |
|
|
|
|
%define FILTER tmpq |
|
|
|
|
%else |
|
|
|
|
%define FILTER %%table |
|
|
|
|
%endif |
|
|
|
|
%assign %%i 6 |
|
|
|
|
%assign %%j 0 |
|
|
|
|
%rep %1 |
|
|
|
|
QPEL_FILTER_V %1, my, %%i, %%j, FILTER |
|
|
|
|
%assign %%i %%i+1 |
|
|
|
|
%assign %%j %%j+1 |
|
|
|
|
%endrep |
|
|
|
|
%rep %%extra |
|
|
|
|
QPEL_H_LOAD_COMPUTE %%i, src |
|
|
|
|
add srcq, srcstrideq |
|
|
|
|
%assign %%i %%i+1 |
|
|
|
|
%endrep |
|
|
|
|
.loop: |
|
|
|
|
QPEL_H_LOAD_COMPUTE %%i, src |
|
|
|
|
vpmulld m22, m14, m6 |
|
|
|
|
vpmulld m23, m15, m7 |
|
|
|
|
vpmulld m24, m16, m8 |
|
|
|
|
vpmulld m25, m17, m9 |
|
|
|
|
vpaddd m26, m22, m23 |
|
|
|
|
vpaddd m24, m25 |
|
|
|
|
vpaddd m26, m24 |
|
|
|
|
vpmulld m22, m18, m10 |
|
|
|
|
vpmulld m23, m19, m11 |
|
|
|
|
vpmulld m24, m20, m12 |
|
|
|
|
vpmulld m25, m21, m13 |
|
|
|
|
vpaddd m22, m22, m23 |
|
|
|
|
vpaddd m24, m25 |
|
|
|
|
vpaddd m26, m24 |
|
|
|
|
vpaddd m22, m26 |
|
|
|
|
mova m14, m15 |
|
|
|
|
mova m15, m16 |
|
|
|
|
mova m16, m17 |
|
|
|
|
mova m17, m18 |
|
|
|
|
mova m18, m19 |
|
|
|
|
mova m19, m20 |
|
|
|
|
mova m20, m21 |
|
|
|
|
vpsrad m22, %%shift |
|
|
|
|
vpmovdw [dstq], m22 |
|
|
|
|
LOOP_END dst, src, srcstride |
|
|
|
|
|
|
|
|
|
RET |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
%if ARCH_X86_64 |
|
|
|
|
%if HAVE_AVX512ICL_EXTERNAL |
|
|
|
|
|
|
|
|
|
INIT_YMM avx512icl |
|
|
|
|
HEVC_PUT_HEVC_QPEL_AVX512ICL 8, 8 |
|
|
|
|
HEVC_PUT_HEVC_QPEL_HV_AVX512ICL 8, 8 |
|
|
|
|
|
|
|
|
|
%endif |
|
|
|
|
%endif |
|
|
|
|