use 2 parms for now to identify the error

pull/26109/head
Wanli 2 months ago
parent 459fc55859
commit 86faf993a7
  1. 2
      modules/core/src/arithm.simd.hpp
  2. 4
      modules/core/test/test_intrin_utils.hpp
  3. 8
      modules/imgproc/src/color_lab.cpp
  4. 6
      modules/imgproc/src/filter.simd.hpp
  5. 2
      modules/imgproc/src/smooth.simd.hpp
  6. 76
      modules/imgproc/src/sumpixels.simd.hpp

@ -1444,7 +1444,7 @@ struct op_mul_scale
static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
{
const v_float32 v_scalar = vx_setall_f32(*scalar);
return v_mul(v_scalar , a , b);
return v_mul(v_mul(v_scalar , a) , b);
}
#endif
static inline T1 r(T1 a, T1 b, const T2* scalar)

@ -488,7 +488,7 @@ template<typename R> struct TheTest
dataA[1] = static_cast<LaneType>(std::numeric_limits<LaneType>::max());
R a = dataA, b = dataB, c = dataC;
Data<R> resD = v_add(a, b), resE = v_add(a, b, c), resF = v_sub(a, b);
Data<R> resD = v_add(a, b), resE = v_add(v_add(a, b), c), resF = v_sub(a, b);
for (int i = 0; i < VTraits<R>::vlanes(); ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
@ -527,7 +527,7 @@ template<typename R> struct TheTest
R a = dataA, b = dataB, c = dataC;
Data<R> resD = v_mul(a, b);
Data<R> resE = v_mul(a, b, c);
Data<R> resE = v_mul(v_mul(a, b), c);
for (int i = 0; i < VTraits<R>::vlanes(); ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));

@ -2244,7 +2244,7 @@ struct Lab2RGBfloat
}
for(int k = 0; k < nrepeats; k++)
{
yhi[k] = v_mul(fyhi[k], fyhi[k], fyhi[k]);
yhi[k] = v_mul(v_mul(fyhi[k], fyhi[k]), fyhi[k]);
}
for(int k = 0; k < nrepeats; k++)
{
@ -3775,9 +3775,9 @@ struct Luv2RGBinteger
// fixing 16bit signed multiplication
// by subtracting 2^(base_shift-1) and then adding result back
v_int32 dummy32, fm[3];
v_expand(v_add(vc[0],vc[1],vc[2]), fm[0], dummy32);
v_expand(v_add(vc[3],vc[4],vc[5]), fm[1], dummy32);
v_expand(v_add(vc[6],vc[7],vc[8]), fm[2], dummy32);
v_expand(v_add(v_add(vc[0],vc[1]),vc[2]), fm[0], dummy32);
v_expand(v_add(v_add(vc[3],vc[4]),vc[5]), fm[1], dummy32);
v_expand(v_add(v_add(vc[6],vc[7]),vc[8]), fm[2], dummy32);
fm[0] = v_shl(fm[0], (base_shift-1));
fm[1] = v_shl(fm[1], (base_shift-1));
fm[2] = v_shl(fm[2], (base_shift-1));

@ -1780,7 +1780,7 @@ struct SymmRowSmallVec_32f
for( ; i <= width - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes(), src += VTraits<v_float32>::vlanes() )
{
v_float32 x = vx_load(src);
v_store(dst + i, v_add(vx_load(src - cn), vx_load(src + cn), x , x));
v_store(dst + i, v_add(v_add(v_add(vx_load(src - cn), vx_load(src + cn)), x) , x));
}
else
for( ; i <= width - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes(), src += VTraits<v_float32>::vlanes() )
@ -2097,13 +2097,13 @@ struct SymmColumnSmallVec_32f
for( ; i <= width - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes() )
{
v_float32 x = vx_load(S1 + i);
v_store(dst + i, v_add(vx_load(S0 + i), vx_load(S2 + i), d4, x, x));
v_store(dst + i, v_add(v_add(v_add(v_add(vx_load(S0 + i), vx_load(S2 + i)), d4), x), x));
}
else
for( ; i <= width - VTraits<v_float32>::vlanes(); i += VTraits<v_float32>::vlanes() )
{
v_float32 x = vx_load(S1 + i);
v_store(dst + i, v_sub(v_add(vx_load(S0 + i), vx_load(S2 + i), d4), v_add(x, x)));
v_store(dst + i, v_sub(v_add(v_add(vx_load(S0 + i), vx_load(S2 + i)), d4), v_add(x, x)));
}
#endif
}

@ -221,7 +221,7 @@ void hlineSmooth3N121Impl(const ET* src, int cn, const FT*, int, FT* dst, int le
#if (CV_SIMD || CV_SIMD_SCALABLE)
const int VECSZ = VTraits<VFT>::vlanes();
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((typename FT::raw_t*)dst, v_shl<(FT::fixedShift-2)>(v_add(vx_load_expand(src - cn), vx_load_expand(src + cn), v_shl<1>((vx_load_expand(src))))));
v_store((typename FT::raw_t*)dst, v_shl<(FT::fixedShift-2)>(v_add(v_add(vx_load_expand(src - cn), vx_load_expand(src + cn)), v_shl<1>((vx_load_expand(src))))));
#endif
for (; i < lencn; i++, src++, dst++)
*dst = (FT(src[-cn])>>2) + (FT(src[cn])>>2) + (FT(src[0])>>1);

@ -130,9 +130,9 @@ struct Integral_SIMD<uchar, int, double>
el8 = v_add(el8, v_rotate_left<1>(el8));
el8 = v_add(el8, v_rotate_left<2>(el8));
#if CV_SIMD_WIDTH >= 32
el8 += v_rotate_left<4>(el8);
el8 = v_add(el8, v_rotate_left<4>(el8));
#if CV_SIMD_WIDTH == 64
el8 += v_rotate_left<8>(el8);
el8 = v_add(el8, v_rotate_left<8>(el8));
#endif
#endif
v_expand(el8, el4l, el4h);
@ -188,11 +188,11 @@ struct Integral_SIMD<uchar, int, double>
el8_1 = v_add(el8_1, v_rotate_left<2>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
#if CV_SIMD_WIDTH >= 32
el8_1 += v_rotate_left<4>(el8_1);
el8_2 += v_rotate_left<4>(el8_2);
el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
#if CV_SIMD_WIDTH == 64
el8_1 += v_rotate_left<8>(el8_1);
el8_2 += v_rotate_left<8>(el8_2);
el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
#endif
#endif
v_expand(el8_1, el4l_1, el4h_1);
@ -350,9 +350,9 @@ struct Integral_SIMD<uchar, int, double>
prev.val = _mm256_permute2x128_si256(el4h.val, el4h.val, 0x31);
#else
#if CV_SIMD_WIDTH >= 32
el8 += v_rotate_left<4>(el8);
el8 = v_add(el8, v_rotate_left<4>(el8));
#if CV_SIMD_WIDTH == 64
el8 += v_rotate_left<8>(el8);
el8 = v_add(el8, v_rotate_left<8>(el8));
#endif
#endif
v_expand(el8, el4l, el4h);
@ -364,7 +364,7 @@ struct Integral_SIMD<uchar, int, double>
prev = v_combine_high(el4h, el4h);
#else
v_int32 t = v_rotate_right<12>(el4h);
t |= v_rotate_left<4>(t);
t = v_or(t, v_rotate_left<4>(t));
prev = v_combine_low(t, t);
#endif
#endif
@ -442,9 +442,9 @@ struct Integral_SIMD<uchar, float, double>
el8 = v_add(el8, v_rotate_left<1>(el8));
el8 = v_add(el8, v_rotate_left<2>(el8));
#if CV_SIMD_WIDTH >= 32
el8 += v_rotate_left<4>(el8);
el8 = v_add(el8, v_rotate_left<4>(el8));
#if CV_SIMD_WIDTH == 64
el8 += v_rotate_left<8>(el8);
el8 = v_add(el8, v_rotate_left<8>(el8));
#endif
#endif
v_int32 el4li, el4hi;
@ -501,11 +501,11 @@ struct Integral_SIMD<uchar, float, double>
el8_1 = v_add(el8_1, v_rotate_left<2>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
#if CV_SIMD_WIDTH >= 32
el8_1 += v_rotate_left<4>(el8_1);
el8_2 += v_rotate_left<4>(el8_2);
el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
#if CV_SIMD_WIDTH == 64
el8_1 += v_rotate_left<8>(el8_1);
el8_2 += v_rotate_left<8>(el8_2);
el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
#endif
#endif
v_int32 el4li_1, el4hi_1, el4li_2, el4hi_2;
@ -590,13 +590,13 @@ struct Integral_SIMD<uchar, float, double>
el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
el8_3 = v_add(el8_3, v_rotate_left<2>(el8_3));
#if CV_SIMD_WIDTH >= 32
el8_1 += v_rotate_left<4>(el8_1);
el8_2 += v_rotate_left<4>(el8_2);
el8_3 += v_rotate_left<4>(el8_3);
el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
el8_3 = v_add(el8_3, v_rotate_left<4>(el8_3));
#if CV_SIMD_WIDTH == 64
el8_1 += v_rotate_left<8>(el8_1);
el8_2 += v_rotate_left<8>(el8_2);
el8_3 += v_rotate_left<8>(el8_3);
el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
el8_3 = v_add(el8_3, v_rotate_left<8>(el8_3));
#endif
#endif
v_int32 el4li_1, el4hi_1, el4li_2, el4hi_2, el4li_3, el4hi_3;
@ -663,9 +663,9 @@ struct Integral_SIMD<uchar, float, double>
prev.val = _mm256_permute2f128_ps(el4h.val, el4h.val, 0x31);
#else
#if CV_SIMD_WIDTH >= 32
el8 += v_rotate_left<4>(el8);
el8 = v_add(el8, v_rotate_left<4>(el8));
#if CV_SIMD_WIDTH == 64
el8 += v_rotate_left<8>(el8);
el8 = v_add(el8, v_rotate_left<8>(el8));
#endif
#endif
v_int32 el4li, el4hi;
@ -678,7 +678,7 @@ struct Integral_SIMD<uchar, float, double>
prev = v_combine_high(el4h, el4h);
#else
v_float32 t = v_rotate_right<12>(el4h);
t |= v_rotate_left<4>(t);
t = v_or(t, v_rotate_left<4>(t));
prev = v_combine_low(t, t);
#endif
#endif
@ -770,9 +770,9 @@ struct Integral_SIMD<uchar, double, double>
el8 = v_add(el8, v_rotate_left<1>(el8));
el8 = v_add(el8, v_rotate_left<2>(el8));
#if CV_SIMD_WIDTH >= 32
el8 += v_rotate_left<4>(el8);
el8 = v_add(el8, v_rotate_left<4>(el8));
#if CV_SIMD_WIDTH == 64
el8 += v_rotate_left<8>(el8);
el8 = v_add(el8, v_rotate_left<8>(el8));
#endif
#endif
v_int32 el4li, el4hi;
@ -843,11 +843,11 @@ struct Integral_SIMD<uchar, double, double>
el8_1 = v_add(el8_1, v_rotate_left<2>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
#if CV_SIMD_WIDTH >= 32
el8_1 += v_rotate_left<4>(el8_1);
el8_2 += v_rotate_left<4>(el8_2);
el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
#if CV_SIMD_WIDTH == 64
el8_1 += v_rotate_left<8>(el8_1);
el8_2 += v_rotate_left<8>(el8_2);
el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
#endif
#endif
v_int32 el4li_1, el4hi_1, el4li_2, el4hi_2;
@ -958,13 +958,13 @@ struct Integral_SIMD<uchar, double, double>
el8_2 = v_add(el8_2, v_rotate_left<2>(el8_2));
el8_3 = v_add(el8_3, v_rotate_left<2>(el8_3));
#if CV_SIMD_WIDTH >= 32
el8_1 += v_rotate_left<4>(el8_1);
el8_2 += v_rotate_left<4>(el8_2);
el8_3 += v_rotate_left<4>(el8_3);
el8_1 = v_add(el8_1, v_rotate_left<4>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<4>(el8_2));
el8_3 = v_add(el8_3, v_rotate_left<4>(el8_3));
#if CV_SIMD_WIDTH == 64
el8_1 += v_rotate_left<8>(el8_1);
el8_2 += v_rotate_left<8>(el8_2);
el8_3 += v_rotate_left<8>(el8_3);
el8_1 = v_add(el8_1, v_rotate_left<8>(el8_1));
el8_2 = v_add(el8_2, v_rotate_left<8>(el8_2));
el8_3 = v_add(el8_3, v_rotate_left<8>(el8_3));
#endif
#endif
v_int32 el4li_1, el4hi_1, el4li_2, el4hi_2, el4li_3, el4hi_3;
@ -1058,9 +1058,9 @@ struct Integral_SIMD<uchar, double, double>
prev_1.val = prev_2.val = el4hh.val;
#else
#if CV_SIMD_WIDTH >= 32
el8 += v_rotate_left<4>(el8);
el8 = v_add(el8, v_rotate_left<4>(el8));
#if CV_SIMD_WIDTH == 64
el8 += v_rotate_left<8>(el8);
el8 = v_add(el8, v_rotate_left<8>(el8));
#endif
#endif
v_int32 el4li, el4hi;

Loading…
Cancel
Save