diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h index c54906dde2..6978b900fe 100644 --- a/libavcodec/aarch64/h26x/dsp.h +++ b/libavcodec/aarch64/h26x/dsp.h @@ -248,6 +248,9 @@ NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _src NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),) +NEON8_FNPROTO_PARTIAL_4(epel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, + const int8_t *hf, const int8_t *vf, int width),) + #undef NEON8_FNPROTO_PARTIAL_6 #define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \ void ff_vvc_put_##fn##4_8_neon##ext args; \ diff --git a/libavcodec/aarch64/h26x/epel_neon.S b/libavcodec/aarch64/h26x/epel_neon.S index 8ca42a5c3a..80a0b66a52 100644 --- a/libavcodec/aarch64/h26x/epel_neon.S +++ b/libavcodec/aarch64/h26x/epel_neon.S @@ -1375,6 +1375,18 @@ endfunc mov x10, #(HEVC_MAX_PB_SIZE * 2) .endm +.macro VVC_EPEL_H_HEADER + ld1r {v30.4s}, [x4] + sub x1, x1, #1 + mov x10, #(VVC_MAX_PB_SIZE * 2) +.endm + +function ff_vvc_put_epel_h4_8_neon, export=1 + VVC_EPEL_H_HEADER + sxtl v0.8h, v30.8b + b 1f +endfunc + function ff_hevc_put_hevc_epel_h4_8_neon, export=1 EPEL_H_HEADER sxtl v0.8h, v30.8b @@ -1414,6 +1426,12 @@ function ff_hevc_put_hevc_epel_h6_8_neon, export=1 ret endfunc +function ff_vvc_put_epel_h8_8_neon, export=1 + VVC_EPEL_H_HEADER + sxtl v0.8h, v30.8b + b 1f +endfunc + function ff_hevc_put_hevc_epel_h8_8_neon, export=1 EPEL_H_HEADER sxtl v0.8h, v30.8b @@ -1461,6 +1479,12 @@ function ff_hevc_put_hevc_epel_h12_8_neon, export=1 ret endfunc +function ff_vvc_put_epel_h16_8_neon, export=1 + VVC_EPEL_H_HEADER + sxtl v0.8h, v30.8b + b 1f +endfunc + function ff_hevc_put_hevc_epel_h16_8_neon, export=1 EPEL_H_HEADER sxtl v0.8h, v30.8b @@ -1523,8 +1547,14 @@ function ff_hevc_put_hevc_epel_h24_8_neon, export=1 ret endfunc +function ff_vvc_put_epel_h32_8_neon, export=1 + VVC_EPEL_H_HEADER + b 0f +endfunc + function ff_hevc_put_hevc_epel_h32_8_neon, export=1 EPEL_H_HEADER +0: ld1 {v1.8b}, [x1], #8 sub x2, x2, w6, uxtw // decrement src stride mov w7, w6 // original width diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c index 714d642634..c8c13eb068 100644 --- a/libavcodec/aarch64/vvc/dsp_init.c +++ b/libavcodec/aarch64/vvc/dsp_init.c @@ -77,6 +77,13 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd) c->inter.put[0][5][1][1] = ff_vvc_put_qpel_hv64_8_neon; c->inter.put[0][6][1][1] = ff_vvc_put_qpel_hv128_8_neon; + c->inter.put[1][1][0][1] = ff_vvc_put_epel_h4_8_neon; + c->inter.put[1][2][0][1] = ff_vvc_put_epel_h8_8_neon; + c->inter.put[1][3][0][1] = ff_vvc_put_epel_h16_8_neon; + c->inter.put[1][4][0][1] = + c->inter.put[1][5][0][1] = + c->inter.put[1][6][0][1] = ff_vvc_put_epel_h32_8_neon; + c->inter.put_uni[0][1][0][0] = ff_vvc_put_pel_uni_pixels4_8_neon; c->inter.put_uni[0][2][0][0] = ff_vvc_put_pel_uni_pixels8_8_neon; c->inter.put_uni[0][3][0][0] = ff_vvc_put_pel_uni_pixels16_8_neon;