aarch64/vvc: Add put_epel_h

put_chroma_h_8_4x4_c:                                    0.2 ( 1.00x)
put_chroma_h_8_4x4_neon:                                 0.2 ( 1.00x)
put_chroma_h_8_8x8_c:                                    0.8 ( 1.00x)
put_chroma_h_8_8x8_neon:                                 0.2 ( 3.00x)
put_chroma_h_8_16x16_c:                                  3.8 ( 1.00x)
put_chroma_h_8_16x16_neon:                               0.8 ( 5.00x)
put_chroma_h_8_32x32_c:                                 12.5 ( 1.00x)
put_chroma_h_8_32x32_neon:                               2.2 ( 5.56x)
put_chroma_h_8_64x64_c:                                 47.0 ( 1.00x)
put_chroma_h_8_64x64_neon:                               8.8 ( 5.37x)
put_chroma_h_8_128x128_c:                              200.2 ( 1.00x)
put_chroma_h_8_128x128_neon:                            31.8 ( 6.31x)
release/7.1
Zhao Zhili 2 months ago committed by Nuo Mi
parent 260e1b4b62
commit 41a1885f7a
  1. 3
      libavcodec/aarch64/h26x/dsp.h
  2. 30
      libavcodec/aarch64/h26x/epel_neon.S
  3. 7
      libavcodec/aarch64/vvc/dsp_init.c

@ -248,6 +248,9 @@ NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _src
NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),)
NEON8_FNPROTO_PARTIAL_4(epel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
const int8_t *hf, const int8_t *vf, int width),)
#undef NEON8_FNPROTO_PARTIAL_6
#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
void ff_vvc_put_##fn##4_8_neon##ext args; \

@ -1375,6 +1375,18 @@ endfunc
mov x10, #(HEVC_MAX_PB_SIZE * 2)
.endm
.macro VVC_EPEL_H_HEADER
ld1r {v30.4s}, [x4]
sub x1, x1, #1
mov x10, #(VVC_MAX_PB_SIZE * 2)
.endm
function ff_vvc_put_epel_h4_8_neon, export=1
VVC_EPEL_H_HEADER
sxtl v0.8h, v30.8b
b 1f
endfunc
function ff_hevc_put_hevc_epel_h4_8_neon, export=1
EPEL_H_HEADER
sxtl v0.8h, v30.8b
@ -1414,6 +1426,12 @@ function ff_hevc_put_hevc_epel_h6_8_neon, export=1
ret
endfunc
function ff_vvc_put_epel_h8_8_neon, export=1
VVC_EPEL_H_HEADER
sxtl v0.8h, v30.8b
b 1f
endfunc
function ff_hevc_put_hevc_epel_h8_8_neon, export=1
EPEL_H_HEADER
sxtl v0.8h, v30.8b
@ -1461,6 +1479,12 @@ function ff_hevc_put_hevc_epel_h12_8_neon, export=1
ret
endfunc
function ff_vvc_put_epel_h16_8_neon, export=1
VVC_EPEL_H_HEADER
sxtl v0.8h, v30.8b
b 1f
endfunc
function ff_hevc_put_hevc_epel_h16_8_neon, export=1
EPEL_H_HEADER
sxtl v0.8h, v30.8b
@ -1523,8 +1547,14 @@ function ff_hevc_put_hevc_epel_h24_8_neon, export=1
ret
endfunc
function ff_vvc_put_epel_h32_8_neon, export=1
VVC_EPEL_H_HEADER
b 0f
endfunc
function ff_hevc_put_hevc_epel_h32_8_neon, export=1
EPEL_H_HEADER
0:
ld1 {v1.8b}, [x1], #8
sub x2, x2, w6, uxtw // decrement src stride
mov w7, w6 // original width

@ -77,6 +77,13 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
c->inter.put[0][5][1][1] = ff_vvc_put_qpel_hv64_8_neon;
c->inter.put[0][6][1][1] = ff_vvc_put_qpel_hv128_8_neon;
c->inter.put[1][1][0][1] = ff_vvc_put_epel_h4_8_neon;
c->inter.put[1][2][0][1] = ff_vvc_put_epel_h8_8_neon;
c->inter.put[1][3][0][1] = ff_vvc_put_epel_h16_8_neon;
c->inter.put[1][4][0][1] =
c->inter.put[1][5][0][1] =
c->inter.put[1][6][0][1] = ff_vvc_put_epel_h32_8_neon;
c->inter.put_uni[0][1][0][0] = ff_vvc_put_pel_uni_pixels4_8_neon;
c->inter.put_uni[0][2][0][0] = ff_vvc_put_pel_uni_pixels8_8_neon;
c->inter.put_uni[0][3][0][0] = ff_vvc_put_pel_uni_pixels16_8_neon;

Loading…
Cancel
Save