Merge pull request #12826 from alalek:issue_8413

pull/12842/head
Alexander Alekhin 6 years ago
commit 78b51fb574
  1. 58
      3rdparty/carotene/src/div.cpp
  2. 9
      modules/core/include/opencv2/core.hpp
  3. 26
      modules/core/src/arithm.cpp
  4. 14
      modules/core/src/arithm_core.hpp
  5. 16
      modules/core/src/arithm_simd.hpp
  6. 4
      modules/core/src/convert_scale.cpp
  7. 25
      modules/core/src/mathfuncs.cpp
  8. 4
      modules/core/src/matmul.cpp
  9. 53
      modules/core/src/opencl/arithm.cl
  10. 127
      modules/core/test/test_arithm.cpp
  11. 7
      modules/gapi/test/common/gapi_core_tests_inl.hpp

@ -151,6 +151,10 @@ void div(const Size2D &size,
typedef typename internal::VecTraits<T>::vec128 vec128; typedef typename internal::VecTraits<T>::vec128 vec128;
typedef typename internal::VecTraits<T>::vec64 vec64; typedef typename internal::VecTraits<T>::vec64 vec64;
#if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
static_assert(std::numeric_limits<T>::is_integer, "template implementation is for integer types only");
#endif
if (scale == 0.0f || if (scale == 0.0f ||
(std::numeric_limits<T>::is_integer && (std::numeric_limits<T>::is_integer &&
(scale * std::numeric_limits<T>::max()) < 1.0f && (scale * std::numeric_limits<T>::max()) < 1.0f &&
@ -311,6 +315,10 @@ void recip(const Size2D &size,
typedef typename internal::VecTraits<T>::vec128 vec128; typedef typename internal::VecTraits<T>::vec128 vec128;
typedef typename internal::VecTraits<T>::vec64 vec64; typedef typename internal::VecTraits<T>::vec64 vec64;
#if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
static_assert(std::numeric_limits<T>::is_integer, "template implementation is for integer types only");
#endif
if (scale == 0.0f || if (scale == 0.0f ||
(std::numeric_limits<T>::is_integer && (std::numeric_limits<T>::is_integer &&
scale < 1.0f && scale < 1.0f &&
@ -463,8 +471,6 @@ void div(const Size2D &size,
return; return;
} }
float32x4_t v_zero = vdupq_n_f32(0.0f);
size_t roiw128 = size.width >= 3 ? size.width - 3 : 0; size_t roiw128 = size.width >= 3 ? size.width - 3 : 0;
size_t roiw64 = size.width >= 1 ? size.width - 1 : 0; size_t roiw64 = size.width >= 1 ? size.width - 1 : 0;
@ -485,9 +491,7 @@ void div(const Size2D &size,
float32x4_t v_src0 = vld1q_f32(src0 + j); float32x4_t v_src0 = vld1q_f32(src0 + j);
float32x4_t v_src1 = vld1q_f32(src1 + j); float32x4_t v_src1 = vld1q_f32(src1 + j);
uint32x4_t v_mask = vceqq_f32(v_src1,v_zero); vst1q_f32(dst + j, vmulq_f32(v_src0, internal::vrecpq_f32(v_src1)));
vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
vreinterpretq_u32_f32(vmulq_f32(v_src0, internal::vrecpq_f32(v_src1))), v_mask)));
} }
for (; j < roiw64; j += 2) for (; j < roiw64; j += 2)
@ -495,14 +499,12 @@ void div(const Size2D &size,
float32x2_t v_src0 = vld1_f32(src0 + j); float32x2_t v_src0 = vld1_f32(src0 + j);
float32x2_t v_src1 = vld1_f32(src1 + j); float32x2_t v_src1 = vld1_f32(src1 + j);
uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero)); vst1_f32(dst + j, vmul_f32(v_src0, internal::vrecp_f32(v_src1)));
vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
vreinterpret_u32_f32(vmul_f32(v_src0, internal::vrecp_f32(v_src1))), v_mask)));
} }
for (; j < size.width; j++) for (; j < size.width; j++)
{ {
dst[j] = src1[j] ? src0[j] / src1[j] : 0.0f; dst[j] = src0[j] / src1[j];
} }
} }
} }
@ -523,10 +525,8 @@ void div(const Size2D &size,
float32x4_t v_src0 = vld1q_f32(src0 + j); float32x4_t v_src0 = vld1q_f32(src0 + j);
float32x4_t v_src1 = vld1q_f32(src1 + j); float32x4_t v_src1 = vld1q_f32(src1 + j);
uint32x4_t v_mask = vceqq_f32(v_src1,v_zero); vst1q_f32(dst + j, vmulq_f32(vmulq_n_f32(v_src0, scale),
vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32( internal::vrecpq_f32(v_src1)));
vreinterpretq_u32_f32(vmulq_f32(vmulq_n_f32(v_src0, scale),
internal::vrecpq_f32(v_src1))), v_mask)));
} }
for (; j < roiw64; j += 2) for (; j < roiw64; j += 2)
@ -534,15 +534,13 @@ void div(const Size2D &size,
float32x2_t v_src0 = vld1_f32(src0 + j); float32x2_t v_src0 = vld1_f32(src0 + j);
float32x2_t v_src1 = vld1_f32(src1 + j); float32x2_t v_src1 = vld1_f32(src1 + j);
uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero)); vst1_f32(dst + j, vmul_f32(vmul_n_f32(v_src0, scale),
vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32( internal::vrecp_f32(v_src1)));
vreinterpret_u32_f32(vmul_f32(vmul_n_f32(v_src0, scale),
internal::vrecp_f32(v_src1))), v_mask)));
} }
for (; j < size.width; j++) for (; j < size.width; j++)
{ {
dst[j] = src1[j] ? src0[j] * scale / src1[j] : 0.0f; dst[j] = src0[j] * scale / src1[j];
} }
} }
} }
@ -620,8 +618,6 @@ void reciprocal(const Size2D &size,
return; return;
} }
float32x4_t v_zero = vdupq_n_f32(0.0f);
size_t roiw128 = size.width >= 3 ? size.width - 3 : 0; size_t roiw128 = size.width >= 3 ? size.width - 3 : 0;
size_t roiw64 = size.width >= 1 ? size.width - 1 : 0; size_t roiw64 = size.width >= 1 ? size.width - 1 : 0;
@ -639,23 +635,19 @@ void reciprocal(const Size2D &size,
float32x4_t v_src1 = vld1q_f32(src1 + j); float32x4_t v_src1 = vld1q_f32(src1 + j);
uint32x4_t v_mask = vceqq_f32(v_src1,v_zero); vst1q_f32(dst + j, internal::vrecpq_f32(v_src1));
vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
vreinterpretq_u32_f32(internal::vrecpq_f32(v_src1)), v_mask)));
} }
for (; j < roiw64; j += 2) for (; j < roiw64; j += 2)
{ {
float32x2_t v_src1 = vld1_f32(src1 + j); float32x2_t v_src1 = vld1_f32(src1 + j);
uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero)); vst1_f32(dst + j, internal::vrecp_f32(v_src1));
vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
vreinterpret_u32_f32(internal::vrecp_f32(v_src1)), v_mask)));
} }
for (; j < size.width; j++) for (; j < size.width; j++)
{ {
dst[j] = src1[j] ? 1.0f / src1[j] : 0; dst[j] = 1.0f / src1[j];
} }
} }
} }
@ -673,25 +665,19 @@ void reciprocal(const Size2D &size,
float32x4_t v_src1 = vld1q_f32(src1 + j); float32x4_t v_src1 = vld1q_f32(src1 + j);
uint32x4_t v_mask = vceqq_f32(v_src1,v_zero); vst1q_f32(dst + j, vmulq_n_f32(internal::vrecpq_f32(v_src1), scale));
vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
vreinterpretq_u32_f32(vmulq_n_f32(internal::vrecpq_f32(v_src1),
scale)),v_mask)));
} }
for (; j < roiw64; j += 2) for (; j < roiw64; j += 2)
{ {
float32x2_t v_src1 = vld1_f32(src1 + j); float32x2_t v_src1 = vld1_f32(src1 + j);
uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero)); vst1_f32(dst + j, vmul_n_f32(internal::vrecp_f32(v_src1), scale));
vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
vreinterpret_u32_f32(vmul_n_f32(internal::vrecp_f32(v_src1),
scale)), v_mask)));
} }
for (; j < size.width; j++) for (; j < size.width; j++)
{ {
dst[j] = src1[j] ? scale / src1[j] : 0; dst[j] = scale / src1[j];
} }
} }
} }

@ -415,8 +415,13 @@ The function cv::divide divides one array by another:
or a scalar by an array when there is no src1 : or a scalar by an array when there is no src1 :
\f[\texttt{dst(I) = saturate(scale/src2(I))}\f] \f[\texttt{dst(I) = saturate(scale/src2(I))}\f]
When src2(I) is zero, dst(I) will also be zero. Different channels of Different channels of multi-channel arrays are processed independently.
multi-channel arrays are processed independently.
For integer types when src2(I) is zero, dst(I) will also be zero.
@note In case of floating point data there is no special defined behavior for zero src2(I) values.
Regular floating-point division is used.
Expect correct IEEE-754 behaviour for floating-point data (with NaN, Inf result values).
@note Saturation is not applied when the output array has the depth CV_32S. You may even get @note Saturation is not applied when the output array has the depth CV_32S. You may even get
result of an incorrect sign in the case of overflow. result of an incorrect sign in the case of overflow.

@ -105,14 +105,18 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int scalarcn = kercn == 3 ? 4 : kercn; int scalarcn = kercn == 3 ? 4 : kercn;
int rowsPerWI = d.isIntel() ? 4 : 1; int rowsPerWI = d.isIntel() ? 4 : 1;
sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d", const int dstDepth = srcdepth;
const int dstType = CV_MAKETYPE(dstDepth, kercn);
const int dstType1 = CV_MAKETYPE(dstDepth, 1);
const int scalarType = CV_MAKETYPE(srcdepth, scalarcn);
sprintf(opts, "-D %s%s -D %s%s -D dstT=%s -D DEPTH_dst=%d -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d",
haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop], haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop],
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) : doubleSupport ? " -D DOUBLE_SUPPORT" : "",
ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "", bitwise ? ocl::memopTypeToStr(dstType) : ocl::typeToStr(dstType),
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) : dstDepth,
ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)), bitwise ? ocl::memopTypeToStr(dstType1) : ocl::typeToStr(dstType1),
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) : bitwise ? ocl::memopTypeToStr(scalarType) : ocl::typeToStr(scalarType),
ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)),
kercn, rowsPerWI); kercn, rowsPerWI);
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
@ -501,12 +505,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
char cvtstr[4][32], opts[1024]; char cvtstr[4][32], opts[1024];
sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s " sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
"-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s " "-D dstT=%s -D DEPTH_dst=%d -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s", "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), ddepth,
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)), ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)),
ocl::typeToStr(wdepth), wdepth, ocl::typeToStr(wdepth), wdepth,
@ -1099,12 +1103,12 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
char cvt[40]; char cvt[40];
String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d" String opts = format("-D %s -D srcT1=%s -D dstT=%s -D DEPTH_dst=%d -D workT=srcT1 -D cn=%d"
" -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s" " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s"
" -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s", " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s",
haveScalar ? "UNARY_OP" : "BINARY_OP", haveScalar ? "UNARY_OP" : "BINARY_OP",
ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)), ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)),
ocl::typeToStr(CV_8UC(kercn)), kercn, ocl::typeToStr(CV_8UC(kercn)), CV_8U, kercn,
ocl::convertTypeStr(depth1, CV_8U, kercn, cvt), ocl::convertTypeStr(depth1, CV_8U, kercn, cvt),
operationMap[op], ocl::typeToStr(depth1), operationMap[op], ocl::typeToStr(depth1),
ocl::typeToStr(depth1), ocl::typeToStr(CV_8U), ocl::typeToStr(depth1), ocl::typeToStr(CV_8U),

@ -516,7 +516,10 @@ div_i( const T* src1, size_t step1, const T* src2, size_t step2,
for( ; i < width; i++ ) for( ; i < width; i++ )
{ {
T num = src1[i], denom = src2[i]; T num = src1[i], denom = src2[i];
dst[i] = denom != 0 ? saturate_cast<T>(num*scale_f/denom) : (T)0; T v = 0;
if (denom != 0)
v = saturate_cast<T>(num*scale_f/denom);
dst[i] = v;
} }
} }
} }
@ -538,7 +541,7 @@ div_f( const T* src1, size_t step1, const T* src2, size_t step2,
for( ; i < width; i++ ) for( ; i < width; i++ )
{ {
T num = src1[i], denom = src2[i]; T num = src1[i], denom = src2[i];
dst[i] = denom != 0 ? saturate_cast<T>(num*scale_f/denom) : (T)0; dst[i] = saturate_cast<T>(num*scale_f/denom);
} }
} }
} }
@ -559,7 +562,10 @@ recip_i( const T* src2, size_t step2,
for( ; i < width; i++ ) for( ; i < width; i++ )
{ {
T denom = src2[i]; T denom = src2[i];
dst[i] = denom != 0 ? saturate_cast<T>(scale_f/denom) : (T)0; T v = 0;
if (denom != 0)
v = saturate_cast<T>(scale_f/denom);
dst[i] = v;
} }
} }
} }
@ -580,7 +586,7 @@ recip_f( const T* src2, size_t step2,
for( ; i < width; i++ ) for( ; i < width; i++ )
{ {
T denom = src2[i]; T denom = src2[i];
dst[i] = denom != 0 ? saturate_cast<T>(scale_f/denom) : (T)0; dst[i] = saturate_cast<T>(scale_f/denom);
} }
} }
} }

@ -1433,7 +1433,6 @@ struct Div_SIMD<float>
return x; return x;
v_float32x4 v_scale = v_setall_f32((float)scale); v_float32x4 v_scale = v_setall_f32((float)scale);
v_float32x4 v_zero = v_setzero_f32();
for ( ; x <= width - 8; x += 8) for ( ; x <= width - 8; x += 8)
{ {
@ -1445,9 +1444,6 @@ struct Div_SIMD<float>
v_float32x4 res0 = f0 * v_scale / f2; v_float32x4 res0 = f0 * v_scale / f2;
v_float32x4 res1 = f1 * v_scale / f3; v_float32x4 res1 = f1 * v_scale / f3;
res0 = v_select(f2 == v_zero, v_zero, res0);
res1 = v_select(f3 == v_zero, v_zero, res1);
v_store(dst + x, res0); v_store(dst + x, res0);
v_store(dst + x + 4, res1); v_store(dst + x + 4, res1);
} }
@ -1675,7 +1671,6 @@ struct Recip_SIMD<float>
return x; return x;
v_float32x4 v_scale = v_setall_f32((float)scale); v_float32x4 v_scale = v_setall_f32((float)scale);
v_float32x4 v_zero = v_setzero_f32();
for ( ; x <= width - 8; x += 8) for ( ; x <= width - 8; x += 8)
{ {
@ -1685,9 +1680,6 @@ struct Recip_SIMD<float>
v_float32x4 res0 = v_scale / f0; v_float32x4 res0 = v_scale / f0;
v_float32x4 res1 = v_scale / f1; v_float32x4 res1 = v_scale / f1;
res0 = v_select(f0 == v_zero, v_zero, res0);
res1 = v_select(f1 == v_zero, v_zero, res1);
v_store(dst + x, res0); v_store(dst + x, res0);
v_store(dst + x + 4, res1); v_store(dst + x + 4, res1);
} }
@ -1712,7 +1704,6 @@ struct Div_SIMD<double>
return x; return x;
v_float64x2 v_scale = v_setall_f64(scale); v_float64x2 v_scale = v_setall_f64(scale);
v_float64x2 v_zero = v_setzero_f64();
for ( ; x <= width - 4; x += 4) for ( ; x <= width - 4; x += 4)
{ {
@ -1724,9 +1715,6 @@ struct Div_SIMD<double>
v_float64x2 res0 = f0 * v_scale / f2; v_float64x2 res0 = f0 * v_scale / f2;
v_float64x2 res1 = f1 * v_scale / f3; v_float64x2 res1 = f1 * v_scale / f3;
res0 = v_select(f2 == v_zero, v_zero, res0);
res1 = v_select(f3 == v_zero, v_zero, res1);
v_store(dst + x, res0); v_store(dst + x, res0);
v_store(dst + x + 2, res1); v_store(dst + x + 2, res1);
} }
@ -1749,7 +1737,6 @@ struct Recip_SIMD<double>
return x; return x;
v_float64x2 v_scale = v_setall_f64(scale); v_float64x2 v_scale = v_setall_f64(scale);
v_float64x2 v_zero = v_setzero_f64();
for ( ; x <= width - 4; x += 4) for ( ; x <= width - 4; x += 4)
{ {
@ -1759,9 +1746,6 @@ struct Recip_SIMD<double>
v_float64x2 res0 = v_scale / f0; v_float64x2 res0 = v_scale / f0;
v_float64x2 res1 = v_scale / f1; v_float64x2 res1 = v_scale / f1;
res0 = v_select(f0 == v_zero, v_zero, res0);
res1 = v_select(f1 == v_zero, v_zero, res1);
v_store(dst + x, res0); v_store(dst + x, res0);
v_store(dst + x + 2, res1); v_store(dst + x + 2, res1);
} }

@ -375,10 +375,10 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
int rowsPerWI = d.isIntel() ? 4 : 1; int rowsPerWI = d.isIntel() ? 4 : 1;
char cvt[2][50]; char cvt[2][50];
int wdepth = std::max(depth, CV_32F); int wdepth = std::max(depth, CV_32F);
String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D DEPTH_dst=%d -D srcT1=%s"
" -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s" " -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s"
" -D workT1=%s -D rowsPerWI=%d%s", " -D workT1=%s -D rowsPerWI=%d%s",
ocl::typeToStr(CV_8UC(kercn)), ocl::typeToStr(CV_8UC(kercn)), CV_8U,
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), wdepth, ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), wdepth,
ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),

@ -71,8 +71,8 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in
int rowsPerWI = d.isIntel() ? 4 : 1; int rowsPerWI = d.isIntel() ? 4 : 1;
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP", format("-D %s -D %s -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI, oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth, rowsPerWI,
double_support ? " -D DOUBLE_SUPPORT" : "")); double_support ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty()) if (k.empty())
return false; return false;
@ -238,9 +238,9 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
return false; return false;
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s", format("-D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D OP_CTP_%s%s",
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth,
depth, rowsPerWI, angleInDegrees ? "AD" : "AR", rowsPerWI, angleInDegrees ? "AD" : "AR",
doubleSupport ? " -D DOUBLE_SUPPORT" : "")); doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty()) if (k.empty())
return false; return false;
@ -474,9 +474,10 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
return false; return false;
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s", format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D BINARY_OP -D OP_PTC_%s%s",
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI, ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth,
depth, angleInDegrees ? "AD" : "AR", rowsPerWI,
angleInDegrees ? "AD" : "AR",
doubleSupport ? " -D DOUBLE_SUPPORT" : "")); doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty()) if (k.empty())
return false; return false;
@ -1169,8 +1170,8 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s", format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
ocl::typeToStr(depth), depth, rowsPerWI, op, ocl::typeToStr(depth), depth, rowsPerWI, op,
doubleSupport ? " -D DOUBLE_SUPPORT" : "")); doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty()) if (k.empty())
return false; return false;
@ -1560,8 +1561,8 @@ static bool ocl_patchNaNs( InputOutputArray _a, float value )
{ {
int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d", format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D DEPTH_dst=%d -D rowsPerWI=%d",
rowsPerWI)); CV_32F, rowsPerWI));
if (k.empty()) if (k.empty())
return false; return false;

@ -2375,10 +2375,10 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp
char cvt[2][50]; char cvt[2][50];
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s" format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D workT=%s -D convertToWT1=%s"
" -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s -D workT1=%s" " -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s -D workT1=%s"
" -D wdepth=%d%s -D rowsPerWI=%d", " -D wdepth=%d%s -D rowsPerWI=%d",
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth,
ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
ocl::convertTypeStr(wdepth, depth, kercn, cvt[1]), ocl::convertTypeStr(wdepth, depth, kercn, cvt[1]),

@ -71,7 +71,30 @@
#pragma OPENCL FP_FAST_FMA ON #pragma OPENCL FP_FAST_FMA ON
#endif #endif
#if depth <= 5 #if !defined(DEPTH_dst)
#error "Kernel configuration error: DEPTH_dst value is required"
#elif !(DEPTH_dst >= 0 && DEPTH_dst <= 7)
#error "Kernel configuration error: invalid DEPTH_dst value"
#endif
#if defined(depth)
#error "Kernel configuration error: ambiguous 'depth' value is defined, use 'DEPTH_dst' instead"
#endif
#if DEPTH_dst < 5 /* CV_32F */
#define CV_DST_TYPE_IS_INTEGER
#else
#define CV_DST_TYPE_IS_FP
#endif
#if DEPTH_dst != 6 /* CV_64F */
#define CV_DST_TYPE_FIT_32F 1
#else
#define CV_DST_TYPE_FIT_32F 0
#endif
#if CV_DST_TYPE_FIT_32F
#define CV_PI M_PI_F #define CV_PI M_PI_F
#else #else
#define CV_PI M_PI #define CV_PI M_PI
@ -204,9 +227,15 @@
#define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2)) #define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2))
#elif defined OP_DIV #elif defined OP_DIV
#ifdef CV_DST_TYPE_IS_INTEGER
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \ workT e2 = srcelem2, zero = (workT)(0); \
storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero)) storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero))
#else
#define PROCESS_ELEM \
workT e2 = srcelem2; \
storedst(convertToDT(srcelem1 / e2))
#endif
#elif defined OP_DIV_SCALE #elif defined OP_DIV_SCALE
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
@ -217,9 +246,15 @@
#else #else
#define EXTRA_PARAMS , scaleT scale #define EXTRA_PARAMS , scaleT scale
#endif #endif
#ifdef CV_DST_TYPE_IS_INTEGER
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \ workT e2 = srcelem2, zero = (workT)(0); \
storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2))) storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)))
#else
#define PROCESS_ELEM \
workT e2 = srcelem2; \
storedst(convertToDT(srcelem1 * (workT)(scale) / e2))
#endif
#elif defined OP_RDIV_SCALE #elif defined OP_RDIV_SCALE
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
@ -230,16 +265,28 @@
#else #else
#define EXTRA_PARAMS , scaleT scale #define EXTRA_PARAMS , scaleT scale
#endif #endif
#ifdef CV_DST_TYPE_IS_INTEGER
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \ workT e1 = srcelem1, zero = (workT)(0); \
storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1))) storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)))
#else
#define PROCESS_ELEM \
workT e1 = srcelem1; \
storedst(convertToDT(srcelem2 * (workT)(scale) / e1))
#endif
#elif defined OP_RECIP_SCALE #elif defined OP_RECIP_SCALE
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , scaleT scale #define EXTRA_PARAMS , scaleT scale
#ifdef CV_DST_TYPE_IS_INTEGER
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \ workT e1 = srcelem1, zero = (workT)(0); \
storedst(convertToDT(e1 != zero ? scale / e1 : zero)) storedst(convertToDT(e1 != zero ? scale / e1 : zero))
#else
#define PROCESS_ELEM \
workT e1 = srcelem1; \
storedst(convertToDT(scale / e1))
#endif
#elif defined OP_ADDW #elif defined OP_ADDW
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
@ -283,7 +330,7 @@
#define PROCESS_ELEM storedst(pown(srcelem1, srcelem2)) #define PROCESS_ELEM storedst(pown(srcelem1, srcelem2))
#elif defined OP_SQRT #elif defined OP_SQRT
#if depth <= 5 #if CV_DST_TYPE_FIT_32F
#define PROCESS_ELEM storedst(native_sqrt(srcelem1)) #define PROCESS_ELEM storedst(native_sqrt(srcelem1))
#else #else
#define PROCESS_ELEM storedst(sqrt(srcelem1)) #define PROCESS_ELEM storedst(sqrt(srcelem1))
@ -324,7 +371,7 @@
#endif #endif
#elif defined OP_CTP_AD || defined OP_CTP_AR #elif defined OP_CTP_AD || defined OP_CTP_AR
#if depth <= 5 #if CV_DST_TYPE_FIT_32F
#define CV_EPSILON FLT_EPSILON #define CV_EPSILON FLT_EPSILON
#else #else
#define CV_EPSILON DBL_EPSILON #define CV_EPSILON DBL_EPSILON

@ -2242,4 +2242,131 @@ TEST(Core_MeanStdDev, regression_multichannel)
} }
} }
template <typename T> static inline
void testDivideInitData(Mat& src1, Mat& src2)
{
CV_StaticAssert(std::numeric_limits<T>::is_integer, "");
const static T src1_[] = {
0, 0, 0, 0,
8, 8, 8, 8,
-8, -8, -8, -8
};
Mat(3, 4, traits::Type<T>::value, (void*)src1_).copyTo(src1);
const static T src2_[] = {
1, 2, 0, std::numeric_limits<T>::max(),
1, 2, 0, std::numeric_limits<T>::max(),
1, 2, 0, std::numeric_limits<T>::max(),
};
Mat(3, 4, traits::Type<T>::value, (void*)src2_).copyTo(src2);
}
template <typename T> static inline
void testDivideInitDataFloat(Mat& src1, Mat& src2)
{
CV_StaticAssert(!std::numeric_limits<T>::is_integer, "");
const static T src1_[] = {
0, 0, 0, 0,
8, 8, 8, 8,
-8, -8, -8, -8
};
Mat(3, 4, traits::Type<T>::value, (void*)src1_).copyTo(src1);
const static T src2_[] = {
1, 2, 0, std::numeric_limits<T>::infinity(),
1, 2, 0, std::numeric_limits<T>::infinity(),
1, 2, 0, std::numeric_limits<T>::infinity(),
};
Mat(3, 4, traits::Type<T>::value, (void*)src2_).copyTo(src2);
}
template <> inline void testDivideInitData<float>(Mat& src1, Mat& src2) { testDivideInitDataFloat<float>(src1, src2); }
template <> inline void testDivideInitData<double>(Mat& src1, Mat& src2) { testDivideInitDataFloat<double>(src1, src2); }
template <typename T> static inline
void testDivideChecks(const Mat& dst)
{
ASSERT_FALSE(dst.empty());
CV_StaticAssert(std::numeric_limits<T>::is_integer, "");
for (int y = 0; y < dst.rows; y++)
{
for (int x = 0; x < dst.cols; x++)
{
if (x == 2)
{
EXPECT_EQ(0, dst.at<T>(y, x)) << "dst(" << y << ", " << x << ") = " << dst.at<T>(y, x);
}
}
}
}
template <typename T> static inline
void testDivideChecksFP(const Mat& dst)
{
ASSERT_FALSE(dst.empty());
CV_StaticAssert(!std::numeric_limits<T>::is_integer, "");
for (int y = 0; y < dst.rows; y++)
{
for (int x = 0; x < dst.cols; x++)
{
if (y == 0 && x == 2)
{
EXPECT_TRUE(cvIsNaN(dst.at<T>(y, x))) << "dst(" << y << ", " << x << ") = " << dst.at<T>(y, x);
}
else if (x == 2)
{
EXPECT_TRUE(cvIsInf(dst.at<T>(y, x))) << "dst(" << y << ", " << x << ") = " << dst.at<T>(y, x);
}
else
{
EXPECT_FALSE(cvIsNaN(dst.at<T>(y, x))) << "dst(" << y << ", " << x << ") = " << dst.at<T>(y, x);
EXPECT_FALSE(cvIsInf(dst.at<T>(y, x))) << "dst(" << y << ", " << x << ") = " << dst.at<T>(y, x);
}
}
}
}
template <> inline void testDivideChecks<float>(const Mat& dst) { testDivideChecksFP<float>(dst); }
template <> inline void testDivideChecks<double>(const Mat& dst) { testDivideChecksFP<double>(dst); }
template <typename T, bool isUMat> static inline
void testDivide()
{
Mat src1, src2;
testDivideInitData<T>(src1, src2);
ASSERT_FALSE(src1.empty()); ASSERT_FALSE(src2.empty());
Mat dst;
if (!isUMat)
{
cv::divide(src1, src2, dst);
}
else
{
UMat usrc1, usrc2, udst;
src1.copyTo(usrc1);
src2.copyTo(usrc2);
cv::divide(usrc1, usrc2, udst);
udst.copyTo(dst);
}
testDivideChecks<T>(dst);
if (::testing::Test::HasFailure())
{
std::cout << "src1 = " << std::endl << src1 << std::endl;
std::cout << "src2 = " << std::endl << src2 << std::endl;
std::cout << "dst = " << std::endl << dst << std::endl;
}
}
TEST(Core_DivideRules, type_32s) { testDivide<int, false>(); }
TEST(UMat_Core_DivideRules, type_32s) { testDivide<int, true>(); }
TEST(Core_DivideRules, type_16s) { testDivide<short, false>(); }
TEST(UMat_Core_DivideRules, type_16s) { testDivide<short, true>(); }
TEST(Core_DivideRules, type_32f) { testDivide<float, false>(); }
TEST(UMat_Core_DivideRules, type_32f) { testDivide<float, true>(); }
TEST(Core_DivideRules, type_64f) { testDivide<double, false>(); }
TEST(UMat_Core_DivideRules, type_64f) { testDivide<double, true>(); }
}} // namespace }} // namespace

@ -56,12 +56,14 @@ TEST_P(MathOpTest, MatricesAccuracyTest )
{ {
if( doReverseOp ) if( doReverseOp )
{ {
in_mat1.setTo(1, in_mat1 == 0); // avoid zeros in divide input data
out = cv::gapi::divRC(sc1, in1, scale, dtype); out = cv::gapi::divRC(sc1, in1, scale, dtype);
cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype); cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype);
break; break;
} }
else else
{ {
sc += Scalar(1, 1, 1, 1); // avoid zeros in divide input data
out = cv::gapi::divC(in1, sc1, scale, dtype); out = cv::gapi::divC(in1, sc1, scale, dtype);
cv::divide(in_mat1, sc, out_mat_ocv, scale, dtype); cv::divide(in_mat1, sc, out_mat_ocv, scale, dtype);
break; break;
@ -100,6 +102,7 @@ TEST_P(MathOpTest, MatricesAccuracyTest )
} }
case (DIV): case (DIV):
{ {
in_mat2.setTo(1, in_mat2 == 0); // avoid zeros in divide input data
out = cv::gapi::div(in1, in2, scale, dtype); out = cv::gapi::div(in1, in2, scale, dtype);
cv::divide(in_mat1, in_mat2, out_mat_ocv, scale, dtype); cv::divide(in_mat1, in_mat2, out_mat_ocv, scale, dtype);
break; break;
@ -187,7 +190,7 @@ TEST_P(MulDoubleTest, AccuracyTest)
EXPECT_EQ(out_mat_gapi.size(), sz_in); EXPECT_EQ(out_mat_gapi.size(), sz_in);
} }
TEST_P(DivTest, DivByZeroTest) TEST_P(DivTest, DISABLED_DivByZeroTest) // https://github.com/opencv/opencv/pull/12826
{ {
int type = 0, dtype = 0; int type = 0, dtype = 0;
cv::Size sz_in; cv::Size sz_in;
@ -217,7 +220,7 @@ TEST_P(DivTest, DivByZeroTest)
} }
} }
TEST_P(DivCTest, DivByZeroTest) TEST_P(DivCTest, DISABLED_DivByZeroTest) // https://github.com/opencv/opencv/pull/12826
{ {
int type = 0, dtype = 0; int type = 0, dtype = 0;
cv::Size sz_in; cv::Size sz_in;

Loading…
Cancel
Save