From 72a3656e8468a394373b6397aacc906d7f7794c2 Mon Sep 17 00:00:00 2001 From: Bin Peng Date: Fri, 13 Dec 2024 22:19:47 +0800 Subject: [PATCH] lavc/aarch64: Fix ff_pred16x16_plane_neon_10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix test failure on aarch64: ./tests/checkasm/checkasm --test=h264pred 367840 Signed-off-by: Peng Bin Signed-off-by: Martin Storsjö --- libavcodec/aarch64/h264pred_neon.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/libavcodec/aarch64/h264pred_neon.S b/libavcodec/aarch64/h264pred_neon.S index 168f8191ad..d0999938ef 100644 --- a/libavcodec/aarch64/h264pred_neon.S +++ b/libavcodec/aarch64/h264pred_neon.S @@ -502,28 +502,27 @@ function ff_pred16x16_plane_neon_10, export=1 add v7.4h, v7.4h, v0.4h shl v2.4h, v7.4h, #4 ssubl v2.4s, v2.4h, v3.4h - shl v3.4h, v4.4h, #4 ext v0.16b, v0.16b, v0.16b, #14 - ssubl v6.4s, v5.4h, v3.4h + sxtl v6.4s, v5.4h // c mov v0.h[0], wzr mul v0.8h, v0.8h, v4.h[0] dup v16.4s, v2.s[0] dup v17.4s, v2.s[0] - dup v2.8h, v4.h[0] - dup v3.4s, v6.s[0] - shl v2.8h, v2.8h, #3 + dup v2.8h, v4.h[0] // b + dup v3.4s, v6.s[0] // c + sshll v2.4s, v2.4h, #3 // b * 8 saddw v16.4s, v16.4s, v0.4h saddw2 v17.4s, v17.4s, v0.8h - saddw v3.4s, v3.4s, v2.4h + sub v3.4s, v3.4s, v2.4s mov w3, #16 mvni v4.8h, #0xFC, lsl #8 // 1023 for clipping 1: sqshrun v0.4h, v16.4s, #5 sqshrun2 v0.8h, v17.4s, #5 - saddw v16.4s, v16.4s, v2.4h - saddw v17.4s, v17.4s, v2.4h + add v16.4s, v16.4s, v2.4s + add v17.4s, v17.4s, v2.4s sqshrun v1.4h, v16.4s, #5 sqshrun2 v1.8h, v17.4s, #5 add v16.4s, v16.4s, v3.4s