use 2 parms for now to identify the error

6 months ago · 86faf993a7
parent 459fc55859
commit 86faf993a7
6 changed files with 49 additions and 49 deletions
--- a/modules/core/src/arithm.simd.hpp
+++ b/modules/core/src/arithm.simd.hpp
@ -1444,7 +1444,7 @@ struct op_mul_scale
    static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
    {
        const v_float32 v_scalar = vx_setall_f32(*scalar);
-        return v_mul(v_scalar , a , b);
+        return v_mul(v_mul(v_scalar , a) , b);
    }
 #endif
    static inline T1 r(T1 a, T1 b, const T2* scalar)
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@ -488,7 +488,7 @@ template<typename R> struct TheTest
        dataA[1] = static_cast<LaneType>(std::numeric_limits<LaneType>::max());
        R a = dataA, b = dataB, c = dataC;

-        Data<R> resD = v_add(a, b), resE = v_add(a, b, c), resF = v_sub(a, b);
+        Data<R> resD = v_add(a, b), resE = v_add(v_add(a, b), c), resF = v_sub(a, b);
        for (int i = 0; i < VTraits<R>::vlanes(); ++i)
        {
            SCOPED_TRACE(cv::format("i=%d", i));
@ -527,7 +527,7 @@ template<typename R> struct TheTest
        R a = dataA, b = dataB, c = dataC;

        Data<R> resD = v_mul(a, b);
-        Data<R> resE = v_mul(a, b, c);
+        Data<R> resE = v_mul(v_mul(a, b), c);
        for (int i = 0; i < VTraits<R>::vlanes(); ++i)
        {
            SCOPED_TRACE(cv::format("i=%d", i));
--- a/modules/imgproc/src/color_lab.cpp
+++ b/modules/imgproc/src/color_lab.cpp
@ -2244,7 +2244,7 @@ struct Lab2RGBfloat
            }
            for(int k = 0; k < nrepeats; k++)
            {
-                yhi[k] = v_mul(fyhi[k], fyhi[k], fyhi[k]);
+                yhi[k] = v_mul(v_mul(fyhi[k], fyhi[k]), fyhi[k]);
            }
            for(int k = 0; k < nrepeats; k++)
            {
@ -3775,9 +3775,9 @@ struct Luv2RGBinteger
            // fixing 16bit signed multiplication
            // by subtracting 2^(base_shift-1) and then adding result back
            v_int32 dummy32, fm[3];
-            v_expand(v_add(vc[0],vc[1],vc[2]), fm[0], dummy32);
-            v_expand(v_add(vc[3],vc[4],vc[5]), fm[1], dummy32);
-            v_expand(v_add(vc[6],vc[7],vc[8]), fm[2], dummy32);
+            v_expand(v_add(v_add(vc[0],vc[1]),vc[2]), fm[0], dummy32);
+            v_expand(v_add(v_add(vc[3],vc[4]),vc[5]), fm[1], dummy32);
+            v_expand(v_add(v_add(vc[6],vc[7]),vc[8]), fm[2], dummy32);
            fm[0] = v_shl(fm[0], (base_shift-1));
            fm[1] = v_shl(fm[1], (base_shift-1));
            fm[2] = v_shl(fm[2], (base_shift-1));
--- a/modules/imgproc/src/filter.simd.hpp
+++ b/modules/imgproc/src/filter.simd.hpp
@ -1780,7 +1780,7 @@ struct SymmRowSmallVec_32f
                        for( ; i <= width - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes(), src += VTraits<v_float32>::vlanes() )
                        {
                            v_float32 x = vx_load(src);
-                            v_store(dst + i, v_add(vx_load(src - cn), vx_load(src + cn), x , x));
+                            v_store(dst + i, v_add(v_add(v_add(vx_load(src - cn), vx_load(src + cn)), x) , x));
                        }
                    else
                        for( ; i <= width - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes(), src += VTraits<v_float32>::vlanes() )
@ -2097,13 +2097,13 @@ struct SymmColumnSmallVec_32f
                    for( ; i <= width - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes() )
                    {
                        v_float32 x = vx_load(S1 + i);
-                        v_store(dst + i, v_add(vx_load(S0 + i), vx_load(S2 + i), d4, x, x));
+                        v_store(dst + i, v_add(v_add(v_add(v_add(vx_load(S0 + i), vx_load(S2 + i)), d4), x), x));
                    }
                else
                    for( ; i <= width - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes() )
                    {
                        v_float32 x = vx_load(S1 + i);
-                        v_store(dst + i, v_sub(v_add(vx_load(S0 + i), vx_load(S2 + i), d4), v_add(x, x)));
+                        v_store(dst + i, v_sub(v_add(v_add(vx_load(S0 + i), vx_load(S2 + i)), d4), v_add(x, x)));
                    }
 #endif
            }
--- a/modules/imgproc/src/smooth.simd.hpp
+++ b/modules/imgproc/src/smooth.simd.hpp
@ -221,7 +221,7 @@ void hlineSmooth3N121Impl(const ET* src, int cn, const FT*, int, FT* dst, int le
 #if (CV_SIMD || CV_SIMD_SCALABLE)
        const int VECSZ = VTraits<VFT>::vlanes();
        for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
-            v_store((typename FT::raw_t*)dst, v_shl<(FT::fixedShift-2)>(v_add(vx_load_expand(src - cn), vx_load_expand(src + cn), v_shl<1>((vx_load_expand(src))))));
+            v_store((typename FT::raw_t*)dst, v_shl<(FT::fixedShift-2)>(v_add(v_add(vx_load_expand(src - cn), vx_load_expand(src + cn)), v_shl<1>((vx_load_expand(src))))));
 #endif
        for (; i < lencn; i++, src++, dst++)
            *dst = (FT(src[-cn])>>2) + (FT(src[cn])>>2) + (FT(src[0])>>1);
--- a/modules/imgproc/src/sumpixels.simd.hpp
+++ b/modules/imgproc/src/sumpixels.simd.hpp
@ -130,9 +130,9 @@ struct Integral_SIMD<uchar, int, double>
                    el8 = v_add(el8, v_rotate_left<1>(el8));
                    el8 = v_add(el8, v_rotate_left<2>(el8));
 #if CV_SIMD_WIDTH >= 32
-                    el8 += v_rotate_left<4>(el8);
+                    el8 = v_add(el8, v_rotate_left<4>(el8));
 #if CV_SIMD_WIDTH == 64
-                    el8 += v_rotate_left<8>(el8);
+                    el8 = v_add(el8, v_rotate_left<8>(el8));
 #endif
 #endif
                    v_expand(el8, el4l, el4h);
@ -188,11 +188,11 @@ struct Integral_SIMD<uchar, int, double>
                    el8_1 = v_add(el8_1, v_rotate_left<2>(el8_1));
                    el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
 #if CV_SIMD_WIDTH >= 32
-                    el8_1 += v_rotate_left<4>(el8_1);
-                    el8_2 += v_rotate_left<4>(el8_2);
+                    el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
 #if CV_SIMD_WIDTH == 64
-                    el8_1 += v_rotate_left<8>(el8_1);
-                    el8_2 += v_rotate_left<8>(el8_2);
+                    el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
 #endif
 #endif
                    v_expand(el8_1, el4l_1, el4h_1);
@ -350,9 +350,9 @@ struct Integral_SIMD<uchar, int, double>
                    prev.val = _mm256_permute2x128_si256(el4h.val, el4h.val, 0x31);
 #else
 #if CV_SIMD_WIDTH >= 32
-                    el8 += v_rotate_left<4>(el8);
+                    el8 = v_add(el8, v_rotate_left<4>(el8));
 #if CV_SIMD_WIDTH == 64
-                    el8 += v_rotate_left<8>(el8);
+                    el8 = v_add(el8, v_rotate_left<8>(el8));
 #endif
 #endif
                    v_expand(el8, el4l, el4h);
@ -364,7 +364,7 @@ struct Integral_SIMD<uchar, int, double>
                    prev = v_combine_high(el4h, el4h);
 #else
                    v_int32 t = v_rotate_right<12>(el4h);
-                    t |= v_rotate_left<4>(t);
+                    t = v_or(t, v_rotate_left<4>(t));
                    prev = v_combine_low(t, t);
 #endif
 #endif
@ -442,9 +442,9 @@ struct Integral_SIMD<uchar, float, double>
                    el8 = v_add(el8, v_rotate_left<1>(el8));
                    el8 = v_add(el8, v_rotate_left<2>(el8));
 #if CV_SIMD_WIDTH >= 32
-                    el8 += v_rotate_left<4>(el8);
+                    el8 = v_add(el8, v_rotate_left<4>(el8));
 #if CV_SIMD_WIDTH == 64
-                    el8 += v_rotate_left<8>(el8);
+                    el8 = v_add(el8, v_rotate_left<8>(el8));
 #endif
 #endif
                    v_int32 el4li, el4hi;
@ -501,11 +501,11 @@ struct Integral_SIMD<uchar, float, double>
                    el8_1 = v_add(el8_1, v_rotate_left<2>(el8_1));
                    el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
 #if CV_SIMD_WIDTH >= 32
-                    el8_1 += v_rotate_left<4>(el8_1);
-                    el8_2 += v_rotate_left<4>(el8_2);
+                    el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
 #if CV_SIMD_WIDTH == 64
-                    el8_1 += v_rotate_left<8>(el8_1);
-                    el8_2 += v_rotate_left<8>(el8_2);
+                    el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
 #endif
 #endif
                    v_int32 el4li_1, el4hi_1, el4li_2, el4hi_2;
@ -590,13 +590,13 @@ struct Integral_SIMD<uchar, float, double>
                    el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
                    el8_3 = v_add(el8_3, v_rotate_left<2>(el8_3));
 #if CV_SIMD_WIDTH >= 32
-                    el8_1 += v_rotate_left<4>(el8_1);
-                    el8_2 += v_rotate_left<4>(el8_2);
-                    el8_3 += v_rotate_left<4>(el8_3);
+                    el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
+                    el8_3 = v_add(el8_3, v_rotate_left<4>(el8_3));
 #if CV_SIMD_WIDTH == 64
-                    el8_1 += v_rotate_left<8>(el8_1);
-                    el8_2 += v_rotate_left<8>(el8_2);
-                    el8_3 += v_rotate_left<8>(el8_3);
+                    el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
+                    el8_3 = v_add(el8_3, v_rotate_left<8>(el8_3));
 #endif
 #endif
                    v_int32 el4li_1, el4hi_1, el4li_2, el4hi_2, el4li_3, el4hi_3;
@ -663,9 +663,9 @@ struct Integral_SIMD<uchar, float, double>
                    prev.val = _mm256_permute2f128_ps(el4h.val, el4h.val, 0x31);
 #else
 #if CV_SIMD_WIDTH >= 32
-                    el8 += v_rotate_left<4>(el8);
+                    el8 = v_add(el8, v_rotate_left<4>(el8));
 #if CV_SIMD_WIDTH == 64
-                    el8 += v_rotate_left<8>(el8);
+                    el8 = v_add(el8, v_rotate_left<8>(el8));
 #endif
 #endif
                    v_int32 el4li, el4hi;
@ -678,7 +678,7 @@ struct Integral_SIMD<uchar, float, double>
                    prev = v_combine_high(el4h, el4h);
 #else
                    v_float32 t = v_rotate_right<12>(el4h);
-                    t |= v_rotate_left<4>(t);
+                    t = v_or(t, v_rotate_left<4>(t));
                    prev = v_combine_low(t, t);
 #endif
 #endif
@ -770,9 +770,9 @@ struct Integral_SIMD<uchar, double, double>
                    el8 = v_add(el8, v_rotate_left<1>(el8));
                    el8 = v_add(el8, v_rotate_left<2>(el8));
 #if CV_SIMD_WIDTH >= 32
-                    el8 += v_rotate_left<4>(el8);
+                    el8 = v_add(el8, v_rotate_left<4>(el8));
 #if CV_SIMD_WIDTH == 64
-                    el8 += v_rotate_left<8>(el8);
+                    el8 = v_add(el8, v_rotate_left<8>(el8));
 #endif
 #endif
                    v_int32 el4li, el4hi;
@ -843,11 +843,11 @@ struct Integral_SIMD<uchar, double, double>
                    el8_1 = v_add(el8_1, v_rotate_left<2>(el8_1));
                    el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
 #if CV_SIMD_WIDTH >= 32
-                    el8_1 += v_rotate_left<4>(el8_1);
-                    el8_2 += v_rotate_left<4>(el8_2);
+                    el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
 #if CV_SIMD_WIDTH == 64
-                    el8_1 += v_rotate_left<8>(el8_1);
-                    el8_2 += v_rotate_left<8>(el8_2);
+                    el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
 #endif
 #endif
                    v_int32 el4li_1, el4hi_1, el4li_2, el4hi_2;
@ -958,13 +958,13 @@ struct Integral_SIMD<uchar, double, double>
                    el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
                    el8_3 = v_add(el8_3, v_rotate_left<2>(el8_3));
 #if CV_SIMD_WIDTH >= 32
-                    el8_1 += v_rotate_left<4>(el8_1);
-                    el8_2 += v_rotate_left<4>(el8_2);
-                    el8_3 += v_rotate_left<4>(el8_3);
+                    el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
+                    el8_3 = v_add(el8_3, v_rotate_left<4>(el8_3));
 #if CV_SIMD_WIDTH == 64
-                    el8_1 += v_rotate_left<8>(el8_1);
-                    el8_2 += v_rotate_left<8>(el8_2);
-                    el8_3 += v_rotate_left<8>(el8_3);
+                    el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
+                    el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
+                    el8_3 = v_add(el8_3, v_rotate_left<8>(el8_3));
 #endif
 #endif
                    v_int32 el4li_1, el4hi_1, el4li_2, el4hi_2, el4li_3, el4hi_3;
@ -1058,9 +1058,9 @@ struct Integral_SIMD<uchar, double, double>
                    prev_1.val = prev_2.val = el4hh.val;
 #else
 #if CV_SIMD_WIDTH >= 32
-                    el8 += v_rotate_left<4>(el8);
+                    el8 = v_add(el8, v_rotate_left<4>(el8));
 #if CV_SIMD_WIDTH == 64
-                    el8 += v_rotate_left<8>(el8);
+                    el8 = v_add(el8, v_rotate_left<8>(el8));
 #endif
 #endif
                    v_int32 el4li, el4hi;