From 6ef357fd54a73986746733d4bd5eeb24acfc84b6 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Fri, 20 Sep 2024 13:38:59 +0300 Subject: [PATCH] build: fix AVX2/AVX512 builds failed due to intrinsics operator usage --- .../backends/fluid/gfluidimgproc_simd_avx2.hpp | 10 +++++----- modules/imgproc/src/resize.cpp | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp b/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp index f7a502f150..990c6560c7 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp @@ -85,15 +85,15 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[], v_deinterleave(low1, high1, s00, s01); // v_float32 res0 = s00*alpha0 + s01*alpha1; - v_float32x8 res0 = v_fma(s00 - s01, alpha0, s01); + v_float32x8 res0 = v_fma(v_sub(s00, s01), alpha0, s01); v_gather_pairs(src1[line], &mapsx[x], low2, high2); v_deinterleave(low2, high2, s10, s11); // v_float32 res1 = s10*alpha0 + s11*alpha1; - v_float32x8 res1 = v_fma(s10 - s11, alpha0, s11); + v_float32x8 res1 = v_fma(v_sub(s10, s11), alpha0, s11); // v_float32 d = res0*beta0 + res1*beta1; - v_float32x8 d = v_fma(res0 - res1, v_beta0, res1); + v_float32x8 d = v_fma(v_sub(res0, res1), v_beta0, res1); v_store(&dst[line][x], d); } @@ -126,7 +126,7 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[], v_deinterleave(low, high, s00, s01); // v_float32 d = s00*alpha0 + s01*alpha1; - v_float32x8 d = v_fma(s00 - s01, alpha0, s01); + v_float32x8 d = v_fma(v_sub(s00, s01), alpha0, s01); v_store(&dst[line][x], d); } @@ -157,7 +157,7 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[], v_float32x8 s1 = v256_load(&src1[line][x]); // v_float32 d = s0*beta0 + s1*beta1; - v_float32x8 d = v_fma(s0 - s1, v_beta0, s1); + v_float32x8 d = v_fma(v_sub(s0, s1), v_beta0, s1); v_store(&dst[line][x], d); } diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index e490380f84..3750690632 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -2536,7 +2536,7 @@ public: #elif CV_SIMD_WIDTH == 64 v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5); v_zip(s0, s3, t0, t1); v_zip(s1, s4, t2, t3); v_zip(s2, s5, t4, t5); - bl = t0 + t3; gl = t1 + t4; rl = t2 + t5; + bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5); #endif s0 = v_add(vx_load_expand(S0 + 6 * VTraits::vlanes()), vx_load_expand(S1 + 6 * VTraits::vlanes())); s1 = v_add(vx_load_expand(S0 + 7 * VTraits::vlanes()), vx_load_expand(S1 + 7 * VTraits::vlanes())); @@ -2556,7 +2556,7 @@ public: #elif CV_SIMD_WIDTH == 64 v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5); v_zip(s0, s3, t0, t1); v_zip(s1, s4, t2, t3); v_zip(s2, s5, t4, t5); - bh = t0 + t3; gh = t1 + t4; rh = t2 + t5; + bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5); #endif v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh)); } @@ -2643,7 +2643,7 @@ public: bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5); #else //CV_SIMD_WIDTH == 64 v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5); - bl = s0 + s3; gl = s1 + s4; rl = s2 + s5; + bl = v_add(s0, s3); gl = v_add(s1, s4); rl = v_add(s2, s5); #endif s0 = v_add(vx_load_expand(S0 + 6 * VTraits::vlanes()), vx_load_expand(S1 + 6 * VTraits::vlanes())); s1 = v_add(vx_load_expand(S0 + 7 * VTraits::vlanes()), vx_load_expand(S1 + 7 * VTraits::vlanes())); @@ -2659,7 +2659,7 @@ public: bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5); #else //CV_SIMD_WIDTH == 64 v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5); - bh = s0 + s3; gh = s1 + s4; rh = s2 + s5; + bh = v_add(s0, s3); gh = v_add(s1, s4); rh = v_add(s2, s5); #endif v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh)); } @@ -2697,7 +2697,7 @@ public: v_expand(v_reinterpret_as_u16(r01), r01l, r01h); v_expand(v_reinterpret_as_u16(r10), r10l, r10h); v_expand(v_reinterpret_as_u16(r11), r11l, r11h); - v_store(D, v_rshr_pack<2>(r00l + r01l + r10l + r11l, r00h + r01h + r10h + r11h)); + v_store(D, v_rshr_pack<2>(v_add(r00l, r01l, r10l, r11l), v_add(r00h, r01h, r10h, r11h))); } #else for ( ; dx <= w - VTraits::vlanes(); dx += VTraits::vlanes(), S0 += VTraits::vlanes(), S1 += VTraits::vlanes(), D += VTraits::vlanes()) @@ -2773,7 +2773,7 @@ public: bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5); #else //CV_SIMD_WIDTH == 64 v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5); - bl = s0 + s3; gl = s1 + s4; rl = s2 + s5; + bl = v_add(s0, s3); gl = v_add(s1, s4); rl = v_add(s2, s5); #endif s0 = v_add(vx_load_expand(S0 + 6 * VTraits::vlanes()), vx_load_expand(S1 + 6 * VTraits::vlanes())); s1 = v_add(vx_load_expand(S0 + 7 * VTraits::vlanes()), vx_load_expand(S1 + 7 * VTraits::vlanes())); @@ -2789,7 +2789,7 @@ public: bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5); #else //CV_SIMD_WIDTH == 64 v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5); - bh = s0 + s3; gh = s1 + s4; rh = s2 + s5; + bh = v_add(s0, s3); gh = v_add(s1, s4); rh = v_add(s2, s5); #endif v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh)); } @@ -2826,7 +2826,7 @@ public: v_expand(v_reinterpret_as_s16(r01), r01l, r01h); v_expand(v_reinterpret_as_s16(r10), r10l, r10h); v_expand(v_reinterpret_as_s16(r11), r11l, r11h); - v_store(D, v_rshr_pack<2>(r00l + r01l + r10l + r11l, r00h + r01h + r10h + r11h)); + v_store(D, v_rshr_pack<2>(v_add(r00l, r01l, r10l, r11l), v_add(r00h, r01h, r10h, r11h))); #else v_int32 r0, r1, r2, r3; r0 = v_add(vx_load_expand(S0), vx_load_expand(S1));