lavc/aarch64: add clip N macro

Signed-off-by: J. Dekker <jdek@itanimul.li>
pull/389/head
J. Dekker 2 years ago
parent 9413bdc381
commit 37cde570bc
  1. 19
      libavcodec/aarch64/hevcdsp_idct_neon.S
  2. 11
      libavcodec/aarch64/neon.S

@ -5,7 +5,7 @@
* *
* Ported from arm/hevcdsp_idct_neon.S by * Ported from arm/hevcdsp_idct_neon.S by
* Copyright (c) 2020 Reimar Döffinger * Copyright (c) 2020 Reimar Döffinger
* Copyright (c) 2020 J. Dekker * Copyright (c) 2023 J. Dekker <jdek@itanimul.li>
* *
* This file is part of FFmpeg. * This file is part of FFmpeg.
* *
@ -38,13 +38,6 @@ const trans, align=4
.short 31, 22, 13, 4 .short 31, 22, 13, 4
endconst endconst
.macro clip2 in1, in2, min, max
smax \in1, \in1, \min
smax \in2, \in2, \min
smin \in1, \in1, \max
smin \in2, \in2, \max
.endm
function ff_hevc_add_residual_4x4_8_neon, export=1 function ff_hevc_add_residual_4x4_8_neon, export=1
ld1 {v0.8h-v1.8h}, [x1] ld1 {v0.8h-v1.8h}, [x1]
ld1 {v2.s}[0], [x0], x2 ld1 {v2.s}[0], [x0], x2
@ -182,7 +175,7 @@ function hevc_add_residual_4x4_16_neon, export=0
ld1 {v3.d}[1], [x12], x2 ld1 {v3.d}[1], [x12], x2
movi v4.8h, #0 movi v4.8h, #0
sqadd v1.8h, v1.8h, v3.8h sqadd v1.8h, v1.8h, v3.8h
clip2 v0.8h, v1.8h, v4.8h, v21.8h clip v4.8h, v21.8h, v0.8h, v1.8h
st1 {v0.d}[0], [x0], x2 st1 {v0.d}[0], [x0], x2
st1 {v0.d}[1], [x0], x2 st1 {v0.d}[1], [x0], x2
st1 {v1.d}[0], [x0], x2 st1 {v1.d}[0], [x0], x2
@ -201,7 +194,7 @@ function hevc_add_residual_8x8_16_neon, export=0
sqadd v0.8h, v0.8h, v2.8h sqadd v0.8h, v0.8h, v2.8h
ld1 {v3.8h}, [x12] ld1 {v3.8h}, [x12]
sqadd v1.8h, v1.8h, v3.8h sqadd v1.8h, v1.8h, v3.8h
clip2 v0.8h, v1.8h, v4.8h, v21.8h clip v4.8h, v21.8h, v0.8h, v1.8h
st1 {v0.8h}, [x0], x2 st1 {v0.8h}, [x0], x2
st1 {v1.8h}, [x12], x2 st1 {v1.8h}, [x12], x2
bne 1b bne 1b
@ -221,8 +214,7 @@ function hevc_add_residual_16x16_16_neon, export=0
sqadd v1.8h, v1.8h, v17.8h sqadd v1.8h, v1.8h, v17.8h
sqadd v2.8h, v2.8h, v18.8h sqadd v2.8h, v2.8h, v18.8h
sqadd v3.8h, v3.8h, v19.8h sqadd v3.8h, v3.8h, v19.8h
clip2 v0.8h, v1.8h, v20.8h, v21.8h clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
clip2 v2.8h, v3.8h, v20.8h, v21.8h
st1 {v0.8h-v1.8h}, [x0], x2 st1 {v0.8h-v1.8h}, [x0], x2
st1 {v2.8h-v3.8h}, [x12], x2 st1 {v2.8h-v3.8h}, [x12], x2
bne 1b bne 1b
@ -239,8 +231,7 @@ function hevc_add_residual_32x32_16_neon, export=0
sqadd v1.8h, v1.8h, v17.8h sqadd v1.8h, v1.8h, v17.8h
sqadd v2.8h, v2.8h, v18.8h sqadd v2.8h, v2.8h, v18.8h
sqadd v3.8h, v3.8h, v19.8h sqadd v3.8h, v3.8h, v19.8h
clip2 v0.8h, v1.8h, v20.8h, v21.8h clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
clip2 v2.8h, v3.8h, v20.8h, v21.8h
st1 {v0.8h-v3.8h}, [x0], x2 st1 {v0.8h-v3.8h}, [x0], x2
bne 1b bne 1b
ret ret

@ -1,6 +1,8 @@
/* /*
* This file is part of FFmpeg. * This file is part of FFmpeg.
* *
* Copyright (c) 2023 J. Dekker <jdek@itanimul.li>
*
* FFmpeg is free software; you can redistribute it and/or * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either * License as published by the Free Software Foundation; either
@ -16,6 +18,15 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
.macro clip min, max, regs:vararg
.irp x, \regs
smax \x, \x, \min
.endr
.irp x, \regs
smin \x, \x, \max
.endr
.endm
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 .macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
trn1 \r8\().8B, \r0\().8B, \r1\().8B trn1 \r8\().8B, \r0\().8B, \r1\().8B
trn2 \r9\().8B, \r0\().8B, \r1\().8B trn2 \r9\().8B, \r0\().8B, \r1\().8B

Loading…
Cancel
Save